2008-11-05 18:16:39

by Harvey Harrison

[permalink] [raw]
Subject: [RFC-PATCH 1/5] unaligned: introduce common header

There are two common cases in the kernel, one where unaligned access
is OK for an arch and one where the arch uses a packed-struct for
the native endianness and opencoded C byteshifting for the other
endianness. Consolidate these two implementations in asm-generic/unaligned.h

Arches that require no special handling of unaligned access can define
_UNALIGNED_ACCESS_OK in their asm/unaligned.h before including the generic
version.

Signed-off-by: Harvey Harrison <[email protected]>
---
Patches 1-3 will have _zero_ functional change, it just consolidates the implementation
in one header rather than the many under linux/unaligned/* and only moves the arches
for which unaligned access is OK or is already using the packed-struct implementation.

Patch 4 will need input from the arch maintainers and isn't meant to be applied immediately,

The memmove-based arches (m32r, xtensa, h8300) are likely going to be fine with this change
barring compiler bugs that made them go with memmove in the first place.

ARM still needs further investigation as to the impact on code generation, but is included
here so that people can try it if possible

Patch 5 is just a removal of the old headers.

include/asm-generic/unaligned.h | 323 +++++++++++++++++++++++++++++++++++++++
1 files changed, 323 insertions(+), 0 deletions(-)

diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
new file mode 100644
index 0000000..583433f
--- /dev/null
+++ b/include/asm-generic/unaligned.h
@@ -0,0 +1,323 @@
+#ifndef _ASM_GENERIC_UNALIGNED_H
+#define _ASM_GENERIC_UNALIGNED_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#ifdef _UNALIGNED_ACCESS_OK
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+ return le16_to_cpup(p);
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+ return le32_to_cpup(p);
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+ return le64_to_cpup(p);
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return be16_to_cpup(p);
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return be32_to_cpup(p);
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+ return be64_to_cpup(p);
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+ *(__le16 *)p = cpu_to_le16(val);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ *(__le32 *)p = cpu_to_le32(val);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+ *(__le64 *)p = cpu_to_le64(val);
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+ *(__be16 *)p = cpu_to_be16(val);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ *(__be32 *)p = cpu_to_be32(val);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+ *(__be64 *)p = cpu_to_be64(val);
+}
+
+#else /* _UNALIGNED_ACCESS_OK */
+
+struct __una_u16 { u16 x __attribute__((packed)); };
+struct __una_u32 { u32 x __attribute__((packed)); };
+struct __una_u64 { u64 x __attribute__((packed)); };
+
+static inline u16 __get_una_le16(const u8 *p)
+{
+ return p[0] | p[1] << 8;
+}
+
+static inline u32 __get_una_le32(const u8 *p)
+{
+ return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+}
+
+static inline u64 __get_una_le64(const u8 *p)
+{
+ return ((u64)__get_una_le32(p + 4) << 32) | __get_una_le32(p);
+}
+
+static inline u16 __get_una_be16(const u8 *p)
+{
+ return p[0] << 8 | p[1];
+}
+
+static inline u32 __get_una_be32(const u8 *p)
+{
+ return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
+}
+
+static inline u64 __get_una_be64(const u8 *p)
+{
+ return ((u64)__get_una_be32(p) << 32) | __get_una_be32(p + 4);
+}
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+#ifdef __LITTLE_ENDIAN
+ return ((const struct __una_u16 *)p)->x;
+#else
+ return __get_una_le16(p);
+#endif
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+#ifdef __LITTLE_ENDIAN
+ return ((const struct __una_u32 *)p)->x;
+#else
+ return __get_una_le32(p);
+#endif
+}
+
+static inline u16 get_unaligned_le64(const void *p)
+{
+#ifdef __LITTLE_ENDIAN
+ return ((const struct __una_u64 *)p)->x;
+#else
+ return __get_una_le64(p);
+#endif
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+#ifdef __BIG_ENDIAN
+ return ((const struct __una_u16 *)p)->x;
+#else
+ return __get_una_be16(p);
+#endif
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+#ifdef __BIG_ENDIAN
+ return ((const struct __una_u32 *)p)->x;
+#else
+ return __get_una_be32(p);
+#endif
+}
+
+static inline u16 get_unaligned_be64(const void *p)
+{
+#ifdef __BIG_ENDIAN
+ return ((const struct __una_u64 *)p)->x;
+#else
+ return __get_una_be64(p);
+#endif
+}
+
+static inline void __put_una_le16(u8 *p, u16 val)
+{
+ *p++ = val;
+ *p++ = val >> 8;
+}
+
+static inline void __put_una_le32(u8 *p, u32 val)
+{
+ __put_una_le16(p + 2, val >> 16);
+ __put_una_le16(p, val);
+}
+
+static inline void __put_una_le64(u8 *p, u64 val)
+{
+ __put_una_le32(p + 4, val >> 32);
+ __put_una_le32(p, val);
+}
+
+static inline void __put_una_be16(u8 *p, u16 val)
+{
+ *p++ = val >> 8;
+ *p++ = val;
+}
+
+static inline void __put_una_be32(u8 *p, u32 val)
+{
+ __put_una_be16(p, val >> 16);
+ __put_una_be16(p + 2, val);
+}
+
+static inline void __put_una_be64(u8 *p, u64 val)
+{
+ __put_una_be32(p, val >> 32);
+ __put_una_be32(p + 4, val);
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+#ifdef __LITTLE_ENDIAN
+ ((struct __una_u16 *)p)->x = val;
+#else
+ __put_una_le16(p, val);
+#endif
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+#ifdef __LITTLE_ENDIAN
+ ((struct __una_u32 *)p)->x = val;
+#else
+ __put_una_le32(p, val);
+#endif
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+#ifdef __LITTLE_ENDIAN
+ ((struct __una_u64 *)p)->x = val;
+#else
+ __put_una_le64(p, val);
+#endif
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+#ifdef __BIG_ENDIAN
+ ((struct __una_u16 *)p)->x = val;
+#else
+ __put_una_be16(p, val);
+#endif
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+#ifdef __BIG_ENDIAN
+ ((struct __una_u32 *)p)->x = val;
+#else
+ __put_una_be32(p, val);
+#endif
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+#ifdef __BIG_ENDIAN
+ ((struct __una_u64 *)p)->x = val;
+#else
+ __put_una_be64(p, val);
+#endif
+}
+
+#endif /* _UNALIGNED_ACCESS_OK */
+
+/*
+ * Cause a link-time error if we try an unaligned access other than
+ * 1,2,4 or 8 bytes long
+ */
+extern void __bad_unaligned_access_size(void);
+
+#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \
+ __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \
+ __bad_unaligned_access_size())))); \
+ }))
+
+#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \
+ __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \
+ __bad_unaligned_access_size())))); \
+ }))
+
+#define __put_unaligned_le(val, ptr) ({ \
+ void *__gu_p = (ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ *(u8 *)__gu_p = (__force u8)(val); \
+ break; \
+ case 2: \
+ put_unaligned_le16((__force u16)(val), __gu_p); \
+ break; \
+ case 4: \
+ put_unaligned_le32((__force u32)(val), __gu_p); \
+ break; \
+ case 8: \
+ put_unaligned_le64((__force u64)(val), __gu_p); \
+ break; \
+ default: \
+ __bad_unaligned_access_size(); \
+ break; \
+ } \
+ (void)0; })
+
+#define __put_unaligned_be(val, ptr) ({ \
+ void *__gu_p = (ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ *(u8 *)__gu_p = (__force u8)(val); \
+ break; \
+ case 2: \
+ put_unaligned_be16((__force u16)(val), __gu_p); \
+ break; \
+ case 4: \
+ put_unaligned_be32((__force u32)(val), __gu_p); \
+ break; \
+ case 8: \
+ put_unaligned_be64((__force u64)(val), __gu_p); \
+ break; \
+ default: \
+ __bad_unaligned_access_size(); \
+ break; \
+ } \
+ (void)0; })
+
+#ifdef __LITTLE_ENDIAN
+# define get_unaligned __get_unaligned_le
+# define put_unaligned __put_unaligned_le
+#else
+# define get_unaligned __get_unaligned_be
+# define put_unaligned __put_unaligned_be
+#endif
+
+#endif /* _ASM_GENERIC_UNALIGNED_H */
--
1.6.0.3.756.gb776d


2008-11-10 11:49:31

by Will Newton

[permalink] [raw]
Subject: Re: [RFC-PATCH 1/5] unaligned: introduce common header

On Mon, Nov 10, 2008 at 4:22 AM, Harvey Harrison
<[email protected]> wrote:

(add back lkml cc that I mistakenly dropped)

> On Sat, 2008-11-08 at 12:47 +0000, Will Newton wrote:
>> On Wed, Nov 5, 2008 at 6:16 PM, Harvey Harrison
>> <[email protected]> wrote:
>>
>> > The memmove-based arches (m32r, xtensa, h8300) are likely going to be fine with this change
>> > barring compiler bugs that made them go with memmove in the first place.
>>
>> As I understand it the need for the memmove implementation is not
>> compiler bugs but default struct alignment. The packed struct
>> implementation will only work with compilers where structs can be
>> aligned on byte boundaries, it's fairly common for RISC architectures
>> to align structs to 4 or 8 byte boundaries.
>
> Which I believe is disabled entirely using __attribute__((packed)), no?

As far as I am aware the packed attribute is handled in this way for
some toolchains (arm in particular). Not everybody does it, and for
good reasons. For example if I have this struct on an architecture
with 8 byte default struct alignment:

struct foo {
u64 big_data;
u8 small_data;
u32 medium_data;
} __attribute__((packed));

Should big_data be accessed as 8 byte load instructions rather than
one 64bit load instruction? It's a pretty large performance penalty to
pay when all I really want is for medium_data to be accessed
correctly.

2008-11-10 16:51:43

by Harvey Harrison

[permalink] [raw]
Subject: Re: [RFC-PATCH 1/5] unaligned: introduce common header

On Mon, 2008-11-10 at 11:49 +0000, Will Newton wrote:
> On Mon, Nov 10, 2008 at 4:22 AM, Harvey Harrison
> <[email protected]> wrote:
>
> (add back lkml cc that I mistakenly dropped)
>
> > On Sat, 2008-11-08 at 12:47 +0000, Will Newton wrote:
> >> On Wed, Nov 5, 2008 at 6:16 PM, Harvey Harrison
> >> <[email protected]> wrote:
> >>
> >> > The memmove-based arches (m32r, xtensa, h8300) are likely going to be fine with this change
> >> > barring compiler bugs that made them go with memmove in the first place.
> >>
> >> As I understand it the need for the memmove implementation is not
> >> compiler bugs but default struct alignment. The packed struct
> >> implementation will only work with compilers where structs can be
> >> aligned on byte boundaries, it's fairly common for RISC architectures
> >> to align structs to 4 or 8 byte boundaries.
> >
> > Which I believe is disabled entirely using __attribute__((packed)), no?
>
> As far as I am aware the packed attribute is handled in this way for
> some toolchains (arm in particular). Not everybody does it, and for
> good reasons. For example if I have this struct on an architecture
> with 8 byte default struct alignment:
>

I should have been more careful with my wording here, I meant that no
alignment assumptions are made when accessing a packed struct through
a pointer, as is the case with the kernel version.

> struct foo {
> u64 big_data;
> u8 small_data;
> u32 medium_data;
> } __attribute__((packed));
>
> Should big_data be accessed as 8 byte load instructions rather than
> one 64bit load instruction? It's a pretty large performance penalty to
> pay when all I really want is for medium_data to be accessed
> correctly.

In this particular case, packed isn't right as you know big_data is
aligned (as long as you can guarantee the struct alignment), so you'd
probably want:

struct foo {
u64 big_data;
u8 small_data;
u32 medium_data __attribute__((__packed__));
}

But that's not what we're talking about in the kernel's case.

Harvey

2008-11-10 18:35:31

by Will Newton

[permalink] [raw]
Subject: Re: [RFC-PATCH 1/5] unaligned: introduce common header

On Mon, Nov 10, 2008 at 4:51 PM, Harvey Harrison
<[email protected]> wrote:
> On Mon, 2008-11-10 at 11:49 +0000, Will Newton wrote:
>> On Mon, Nov 10, 2008 at 4:22 AM, Harvey Harrison
>> <[email protected]> wrote:
>>
>> (add back lkml cc that I mistakenly dropped)
>>
>> > On Sat, 2008-11-08 at 12:47 +0000, Will Newton wrote:
>> >> On Wed, Nov 5, 2008 at 6:16 PM, Harvey Harrison
>> >> <[email protected]> wrote:
>> >>
>> >> > The memmove-based arches (m32r, xtensa, h8300) are likely going to be fine with this change
>> >> > barring compiler bugs that made them go with memmove in the first place.
>> >>
>> >> As I understand it the need for the memmove implementation is not
>> >> compiler bugs but default struct alignment. The packed struct
>> >> implementation will only work with compilers where structs can be
>> >> aligned on byte boundaries, it's fairly common for RISC architectures
>> >> to align structs to 4 or 8 byte boundaries.
>> >
>> > Which I believe is disabled entirely using __attribute__((packed)), no?
>>
>> As far as I am aware the packed attribute is handled in this way for
>> some toolchains (arm in particular). Not everybody does it, and for
>> good reasons. For example if I have this struct on an architecture
>> with 8 byte default struct alignment:
>>
>
> I should have been more careful with my wording here, I meant that no
> alignment assumptions are made when accessing a packed struct through
> a pointer, as is the case with the kernel version.

I am not aware that this is the case. Do you have any references?

>> struct foo {
>> u64 big_data;
>> u8 small_data;
>> u32 medium_data;
>> } __attribute__((packed));
>>
>> Should big_data be accessed as 8 byte load instructions rather than
>> one 64bit load instruction? It's a pretty large performance penalty to
>> pay when all I really want is for medium_data to be accessed
>> correctly.
>
> In this particular case, packed isn't right as you know big_data is
> aligned (as long as you can guarantee the struct alignment), so you'd
> probably want:
>
> struct foo {
> u64 big_data;
> u8 small_data;
> u32 medium_data __attribute__((__packed__));
> }
>
> But that's not what we're talking about in the kernel's case.

Perhaps that would be a neater way of expressing what is required in
my simple example, but it's fairly common to use packed on the whole
struct which could be because a field that is "packed" by default on
one architecture might not be on another. You could mark every field
as packed but few people seem to do that and as far as I am aware
there is no documented difference between packing all members and the
whole struct. The gcc documentation for packed is pretty short:

The packed attribute specifies that a variable or structure field
should have the smallest
possible alignment?one byte for a variable, and one bit for a field,
unless you specify a
larger value with the aligned attribute.

I'd love to know if the pointer alignment behaviour is widespread and
then maybe write a patch for the gcc manual.

2008-11-10 18:52:58

by Harvey Harrison

[permalink] [raw]
Subject: Re: [RFC-PATCH 1/5] unaligned: introduce common header

On Mon, 2008-11-10 at 18:35 +0000, Will Newton wrote:
> On Mon, Nov 10, 2008 at 4:51 PM, Harvey Harrison
> <[email protected]> wrote:
> >
> > In this particular case, packed isn't right as you know big_data is
> > aligned (as long as you can guarantee the struct alignment), so you'd
> > probably want:
> >
> > struct foo {
> > u64 big_data;
> > u8 small_data;
> > u32 medium_data __attribute__((__packed__));
> > }
> >
> > But that's not what we're talking about in the kernel's case.
>
> Perhaps that would be a neater way of expressing what is required in
> my simple example, but it's fairly common to use packed on the whole
> struct which could be because a field that is "packed" by default on
> one architecture might not be on another. You could mark every field
> as packed but few people seem to do that and as far as I am aware
> there is no documented difference between packing all members and the
> whole struct. The gcc documentation for packed is pretty short:

Actually it's documented that putting attribute(packed) on the struct
is equivalent to putting attribute(packed) on _every_ member of the
struct.

> The packed attribute specifies that a variable or structure field
> should have the smallest
> possible alignment—one byte for a variable, and one bit for a field,
> unless you specify a
> larger value with the aligned attribute.
>
> I'd love to know if the pointer alignment behaviour is widespread and
> then maybe write a patch for the gcc manual.

Well, it's kind of the whole point of __packed isn't it? Otherwise the
struct members get naturally (or some arch-dependent value) aligned,
which the compiler can rely on unless you say __packed.

So in my example above, the compiler _knows_ how it has aligned
big_data and small_data and can use whatever access is most efficient,
but it can't make any assumptions about medium_data, so access through
a pointer _must_ be done unaligned.

struct foo *bar;
bar->medium_data; // compiler must do this unaligned

Harvey

2008-11-11 09:51:14

by Will Newton

[permalink] [raw]
Subject: Re: [RFC-PATCH 1/5] unaligned: introduce common header

On Mon, Nov 10, 2008 at 6:51 PM, Harvey Harrison
<[email protected]> wrote:
> On Mon, 2008-11-10 at 18:35 +0000, Will Newton wrote:
>> On Mon, Nov 10, 2008 at 4:51 PM, Harvey Harrison
>> <[email protected]> wrote:
>> >
>> > In this particular case, packed isn't right as you know big_data is
>> > aligned (as long as you can guarantee the struct alignment), so you'd
>> > probably want:
>> >
>> > struct foo {
>> > u64 big_data;
>> > u8 small_data;
>> > u32 medium_data __attribute__((__packed__));
>> > }
>> >
>> > But that's not what we're talking about in the kernel's case.
>>
>> Perhaps that would be a neater way of expressing what is required in
>> my simple example, but it's fairly common to use packed on the whole
>> struct which could be because a field that is "packed" by default on
>> one architecture might not be on another. You could mark every field
>> as packed but few people seem to do that and as far as I am aware
>> there is no documented difference between packing all members and the
>> whole struct. The gcc documentation for packed is pretty short:
>
> Actually it's documented that putting attribute(packed) on the struct
> is equivalent to putting attribute(packed) on _every_ member of the
> struct.
>
>> The packed attribute specifies that a variable or structure field
>> should have the smallest
>> possible alignment?one byte for a variable, and one bit for a field,
>> unless you specify a
>> larger value with the aligned attribute.
>>
>> I'd love to know if the pointer alignment behaviour is widespread and
>> then maybe write a patch for the gcc manual.
>
> Well, it's kind of the whole point of __packed isn't it? Otherwise the
> struct members get naturally (or some arch-dependent value) aligned,
> which the compiler can rely on unless you say __packed.
>
> So in my example above, the compiler _knows_ how it has aligned
> big_data and small_data and can use whatever access is most efficient,
> but it can't make any assumptions about medium_data, so access through
> a pointer _must_ be done unaligned.
>
> struct foo *bar;
> bar->medium_data; // compiler must do this unaligned

Agreed, but the packed struct unaligned access code does this:

struct __una_u16 { u16 x __attribute__((packed)); };

On an architecture with struct alignment > 1 this will not work. Which
is why I believe that the memmove code must stay for those
architectures.