As pointed out by Arnd Bergmann, in include/linux/usb/ehci_def.h, struct
ehci_caps is defined with __attribute__((packed)) for no good reason,
and this triggers undefined behaviour when using ARM's readl() on
pointers to elements of this structure:
http://lkml.kernel.org/r/[email protected]
Cc: Arnd Bergmann <[email protected]>
Signed-off-by: Rabin Vincent <[email protected]>
---
include/linux/usb/ehci_def.h | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index e49dfd4..7879943 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -52,7 +52,7 @@ struct ehci_caps {
#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
u8 portroute[8]; /* nibbles for routing - offset 0xC */
-} __attribute__ ((packed));
+};
/* Section 2.3 Host Controller Operational Registers */
@@ -150,7 +150,7 @@ struct ehci_regs {
#define PORT_CSC (1<<1) /* connect status change */
#define PORT_CONNECT (1<<0) /* device connected */
#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
-} __attribute__ ((packed));
+};
#define USBMODE 0x68 /* USB Device mode */
#define USBMODE_SDIS (1<<3) /* Stream disable */
@@ -194,7 +194,7 @@ struct ehci_dbg_port {
u32 data47;
u32 address;
#define DBGP_EPADDR(dev, ep) (((dev)<<8)|(ep))
-} __attribute__ ((packed));
+};
#ifdef CONFIG_EARLY_PRINTK_DBGP
#include <linux/init.h>
--
1.7.4.1
Hello.
Rabin Vincent wrote:
> As pointed out by Arnd Bergmann, in include/linux/usb/ehci_def.h, struct
> ehci_caps is defined with __attribute__((packed)) for no good reason,
You're talking only of one structure here, yet you're changing several...
> and this triggers undefined behaviour when using ARM's readl() on
> pointers to elements of this structure:
> http://lkml.kernel.org/r/[email protected]
> Cc: Arnd Bergmann <[email protected]>
> Signed-off-by: Rabin Vincent <[email protected]>
> ---
> include/linux/usb/ehci_def.h | 6 +++---
> 1 files changed, 3 insertions(+), 3 deletions(-)
> diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
> index e49dfd4..7879943 100644
> --- a/include/linux/usb/ehci_def.h
> +++ b/include/linux/usb/ehci_def.h
> @@ -52,7 +52,7 @@ struct ehci_caps {
> #define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
> #define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
> u8 portroute[8]; /* nibbles for routing - offset 0xC */
> -} __attribute__ ((packed));
> +};
>
>
> /* Section 2.3 Host Controller Operational Registers */
> @@ -150,7 +150,7 @@ struct ehci_regs {
> #define PORT_CSC (1<<1) /* connect status change */
> #define PORT_CONNECT (1<<0) /* device connected */
> #define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
> -} __attribute__ ((packed));
> +};
>
> #define USBMODE 0x68 /* USB Device mode */
> #define USBMODE_SDIS (1<<3) /* Stream disable */
> @@ -194,7 +194,7 @@ struct ehci_dbg_port {
> u32 data47;
> u32 address;
> #define DBGP_EPADDR(dev, ep) (((dev)<<8)|(ep))
> -} __attribute__ ((packed));
> +};
>
> #ifdef CONFIG_EARLY_PRINTK_DBGP
> #include <linux/init.h>
WBR, Sergei
As pointed out by Arnd Bergmann, in include/linux/usb/ehci_def.h, struct
ehci_caps is defined with __attribute__((packed)) for no good reason,
and this triggers undefined behaviour when using ARM's readl() on
pointers to elements of this structure:
http://lkml.kernel.org/r/[email protected]
The same problem exists with the other two structures in ehci_def.h too,
so remove the __attribute__((packed)) from all of them.
Cc: Arnd Bergmann <[email protected]>
Signed-off-by: Rabin Vincent <[email protected]>
---
v2: clarify that the problem is not just with struct ehci_caps
include/linux/usb/ehci_def.h | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index e49dfd4..7879943 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -52,7 +52,7 @@ struct ehci_caps {
#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
u8 portroute[8]; /* nibbles for routing - offset 0xC */
-} __attribute__ ((packed));
+};
/* Section 2.3 Host Controller Operational Registers */
@@ -150,7 +150,7 @@ struct ehci_regs {
#define PORT_CSC (1<<1) /* connect status change */
#define PORT_CONNECT (1<<0) /* device connected */
#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
-} __attribute__ ((packed));
+};
#define USBMODE 0x68 /* USB Device mode */
#define USBMODE_SDIS (1<<3) /* Stream disable */
@@ -194,7 +194,7 @@ struct ehci_dbg_port {
u32 data47;
u32 address;
#define DBGP_EPADDR(dev, ep) (((dev)<<8)|(ep))
-} __attribute__ ((packed));
+};
#ifdef CONFIG_EARLY_PRINTK_DBGP
#include <linux/init.h>
--
1.7.4.1
In commit 139540170d9d9b7ead3caaf540f161756b356d56 the attribute
packed is removed from the structs which are used to access the EHCI-registers.
This is done to circumvent a problem with gcc 4.6, which might access members of
packed structs on a byte by byte basis. But using packed, aligned(4) fixes that
too and is imho the better solution. Otherwise (without packed) the compiler would be free
to choose whatever alignment he thinks fits best, which might be e.g. 8-byte on 64-bit machines.
Signed-off-by: Alexander Holler <[email protected]>
---
include/linux/usb/ehci_def.h | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 7cc95ee..15c87a9 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -57,7 +57,7 @@ struct ehci_caps {
#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
u8 portroute[8]; /* nibbles for routing - offset 0xC */
-};
+} __attribute__ ((packed, aligned(4)));
/* Section 2.3 Host Controller Operational Registers */
@@ -155,7 +155,7 @@ struct ehci_regs {
#define PORT_CSC (1<<1) /* connect status change */
#define PORT_CONNECT (1<<0) /* device connected */
#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
-};
+} __attribute__ ((packed, aligned(4)));
#define USBMODE 0x68 /* USB Device mode */
#define USBMODE_SDIS (1<<3) /* Stream disable */
@@ -199,7 +199,7 @@ struct ehci_dbg_port {
u32 data47;
u32 address;
#define DBGP_EPADDR(dev, ep) (((dev)<<8)|(ep))
-};
+} __attribute__ ((packed, aligned(4)));
#ifdef CONFIG_EARLY_PRINTK_DBGP
#include <linux/init.h>
--
1.7.3.4
On Thu, 16 Jun 2011, Alexander Holler wrote:
> In commit 139540170d9d9b7ead3caaf540f161756b356d56 the attribute
> packed is removed from the structs which are used to access the EHCI-registers.
>
> This is done to circumvent a problem with gcc 4.6, which might access members of
> packed structs on a byte by byte basis. But using packed, aligned(4) fixes that
> too and is imho the better solution. Otherwise (without packed) the compiler would be free
> to choose whatever alignment he thinks fits best, which might be e.g. 8-byte on 64-bit machines.
Is that really true? I thought the compiler was not allowed to insert
padding if the natural alignment of the data types didn't require any.
Alan Stern
On Thursday 16 June 2011, Alan Stern wrote:
>
> On Thu, 16 Jun 2011, Alexander Holler wrote:
>
> > In commit 139540170d9d9b7ead3caaf540f161756b356d56 the attribute
> > packed is removed from the structs which are used to access the EHCI-registers.
> >
> > This is done to circumvent a problem with gcc 4.6, which might access members of
> > packed structs on a byte by byte basis. But using packed, aligned(4) fixes that
> > too and is imho the better solution. Otherwise (without packed) the compiler would be free
> > to choose whatever alignment he thinks fits best, which might be e.g. 8-byte on 64-bit machines.
>
> Is that really true?
No.
> I thought the compiler was not allowed to insert
> padding if the natural alignment of the data types didn't require any.
It's architecture dependent. The alignment of the structure is the maximum alignment
of its members, so it gets to be 8 bytes if there is a 64 bit member in the struct
on most architectures, but 4 bytes on x86.
Arnd
Am 16.06.2011 19:09, schrieb Alan Stern:
> On Thu, 16 Jun 2011, Alexander Holler wrote:
>
>> In commit 139540170d9d9b7ead3caaf540f161756b356d56 the attribute
>> packed is removed from the structs which are used to access the EHCI-registers.
>>
>> This is done to circumvent a problem with gcc 4.6, which might access members of
>> packed structs on a byte by byte basis. But using packed, aligned(4) fixes that
>> too and is imho the better solution. Otherwise (without packed) the compiler would be free
>> to choose whatever alignment he thinks fits best, which might be e.g. 8-byte on 64-bit machines.
>
> Is that really true? I thought the compiler was not allowed to insert
> padding if the natural alignment of the data types didn't require any.
>
> Alan Stern
I wasn't sure and have searched c99 before posting the patch but I
haven't found something which states what you are suggesting. Maybe I
was too stupid to find it, I've searched for the words "alignment" and
"padding".
The only statement I've found was
"There may be unnamed padding within a structure object, but not at its
beginning."
in 6.7.2.1 13 and
"There may be unnamed padding at the end of a structure or union."
in 6.7.2.1. 15 in my copy of c99.
Regards,
Alexander
Am 16.06.2011 19:55, schrieb Arnd Bergmann:
> On Thursday 16 June 2011, Alan Stern wrote:
>>
>> On Thu, 16 Jun 2011, Alexander Holler wrote:
>>
>>> In commit 139540170d9d9b7ead3caaf540f161756b356d56 the attribute
>>> packed is removed from the structs which are used to access the EHCI-registers.
>>>
>>> This is done to circumvent a problem with gcc 4.6, which might access members of
>>> packed structs on a byte by byte basis. But using packed, aligned(4) fixes that
>>> too and is imho the better solution. Otherwise (without packed) the compiler would be free
>>> to choose whatever alignment he thinks fits best, which might be e.g. 8-byte on 64-bit machines.
>>
>> Is that really true?
>
> No.
>
>> I thought the compiler was not allowed to insert
>> padding if the natural alignment of the data types didn't require any.
>
> It's architecture dependent. The alignment of the structure is the maximum alignment
> of its members, so it gets to be 8 bytes if there is a 64 bit member in the struct
> on most architectures, but 4 bytes on x86.
Hmm, sorry, but that sentence just says something about the alignment of
the structure itself and nothing about the alignment of it's members or
do I understand something wrong?
I've had a look at c99 again, and in addition to the two points in c99 I
mentioned in the mail before (6.7.2.1 13 and 6.7.2.1. 15), I've only
found the following on that topic:
6.7.2.1 12 Each non-bit-ļ¬eld member of a structure or union object is
aligned in an implementationdeļ¬ned manner appropriate to its type.
And, under "J.1 Unspecified behaviour":
Many aspects of the representations of types (6.2.6).
I even haven't found anything which says something about the alignment
of a structure itself. But I'm no compiler expert and I look only seldom
at c99 and usually try to avoid such aspects as the one we are talking
about. ;)
For me that means that I understand that when packed(,aligned(4)) is
used, it's pretty sure, that there is no padding inbetween the members
of e.g. struct ehci_regs. But without I'm unsure, so I would avoid that.
That aligned(4) is necessary (for ARM) is only a workaround because of
the implementation of readl(), at least that is how I understood the
discussion. But that is discussed elsewhere and don't want to take part
in that discussion (and can't).
Regards,
Alexander
On Thu, 16 Jun 2011, Alexander Holler wrote:
> >> I thought the compiler was not allowed to insert
> >> padding if the natural alignment of the data types didn't require any.
> >
> > It's architecture dependent. The alignment of the structure is the maximum alignment
> > of its members, so it gets to be 8 bytes if there is a 64 bit member in the struct
> > on most architectures, but 4 bytes on x86.
>
> Hmm, sorry, but that sentence just says something about the alignment of
> the structure itself and nothing about the alignment of it's members or
> do I understand something wrong?
We're talking about padding, not alignment. Obviously these two
concepts are related, since fields with differing alignment
requirements sometimes force the compiler to insert padding. But they
aren't the same thing.
The question is whether gcc will insert padding in a structure that
doesn't need it. The kernel depends on peculiarities of gcc in many
ways; basing your strategy on what the C99 spec says is not always a
good idea.
> For me that means that I understand that when packed(,aligned(4)) is
> used, it's pretty sure, that there is no padding inbetween the members
> of e.g. struct ehci_regs. But without I'm unsure, so I would avoid that.
>
> That aligned(4) is necessary (for ARM) is only a workaround because of
> the implementation of readl(), at least that is how I understood the
> discussion. But that is discussed elsewhere and don't want to take part
> in that discussion (and can't).
Hmmm. I won't say that ((packed,aligned(4))) is wrong. But it's not
clearly necessary either.
Alan Stern
Am 16.06.2011 21:46, schrieb Alan Stern:
> On Thu, 16 Jun 2011, Alexander Holler wrote:
>
>>>> I thought the compiler was not allowed to insert
>>>> padding if the natural alignment of the data types didn't require any.
>>>
>>> It's architecture dependent. The alignment of the structure is the maximum alignment
>>> of its members, so it gets to be 8 bytes if there is a 64 bit member in the struct
>>> on most architectures, but 4 bytes on x86.
>>
>> Hmm, sorry, but that sentence just says something about the alignment of
>> the structure itself and nothing about the alignment of it's members or
>> do I understand something wrong?
>
> We're talking about padding, not alignment. Obviously these two
> concepts are related, since fields with differing alignment
> requirements sometimes force the compiler to insert padding. But they
> aren't the same thing.
>
> The question is whether gcc will insert padding in a structure that
> doesn't need it. The kernel depends on peculiarities of gcc in many
> ways; basing your strategy on what the C99 spec says is not always a
> good idea.
>
>> For me that means that I understand that when packed(,aligned(4)) is
>> used, it's pretty sure, that there is no padding inbetween the members
>> of e.g. struct ehci_regs. But without I'm unsure, so I would avoid that.
>>
>> That aligned(4) is necessary (for ARM) is only a workaround because of
>> the implementation of readl(), at least that is how I understood the
>> discussion. But that is discussed elsewhere and don't want to take part
>> in that discussion (and can't).
>
> Hmmm. I won't say that ((packed,aligned(4))) is wrong. But it's not
> clearly necessary either.
Maybe I should have added a "cosmetic:" in front of the subject of the
patch. ;)
Using packed doesn't seem to be necessary (at least not with those
versions of gcc I'm using here), I've tried both versions (on arm,
without packed and with packed, aligned(4)) and both are working. I've
only posted the patch because I found the usage of packed, aligned(4)
much clearer than without packed. And It might help avoiding such
discussions like this with people like me who aren't that deep involved
in gcc-specific implementation details. ;)
Anyway, feel free to nack that patch. I don't really care and just
thought I should post it (e.g. as an alternative to removing that packed).
Regards,
Alexander
On Thursday 16 June 2011 22:10:53 Alexander Holler wrote:
> Using packed doesn't seem to be necessary (at least not with those
> versions of gcc I'm using here), I've tried both versions (on arm,
> without packed and with packed, aligned(4)) and both are working. I've
> only posted the patch because I found the usage of packed, aligned(4)
> much clearer than without packed. And It might help avoiding such
> discussions like this with people like me who aren't that deep involved
> in gcc-specific implementation details. ;)
>
> Anyway, feel free to nack that patch. I don't really care and just
> thought I should post it (e.g. as an alternative to removing that packed).
At least I would be happier without the patch. I'm trying to convince
people to not use these attributes unless required because too much
harm is done when they are used without understanding the full
consequences. I also recommend using __packed as localized as possible,
i.e. set it for the members that need it, not the entire struct.
I agree that your patch is harmless, it's just the opposite of
a cleanup in my opinion.
Arnd
On Thu, 16 Jun 2011, Alexander Holler wrote:
> > Hmmm. I won't say that ((packed,aligned(4))) is wrong. But it's not
> > clearly necessary either.
>
> Maybe I should have added a "cosmetic:" in front of the subject of the
> patch. ;)
>
> Using packed doesn't seem to be necessary (at least not with those
> versions of gcc I'm using here), I've tried both versions (on arm,
> without packed and with packed, aligned(4)) and both are working. I've
> only posted the patch because I found the usage of packed, aligned(4)
> much clearer than without packed. And It might help avoiding such
> discussions like this with people like me who aren't that deep involved
> in gcc-specific implementation details. ;)
>
> Anyway, feel free to nack that patch. I don't really care and just
> thought I should post it (e.g. as an alternative to removing that packed).
There are other structures that need to avoid padding but aren't marked
((packed)), such as ehci_qtd and ehci_qh_hw in drivers/usb/host/ehci.h.
I don't see why those in ehci_def.h should get special treatment.
Alan Stern
On Thu, 16 Jun 2011, Arnd Bergmann wrote:
> On Thursday 16 June 2011 22:10:53 Alexander Holler wrote:
> > Using packed doesn't seem to be necessary (at least not with those
> > versions of gcc I'm using here), I've tried both versions (on arm,
> > without packed and with packed, aligned(4)) and both are working. I've
> > only posted the patch because I found the usage of packed, aligned(4)
> > much clearer than without packed. And It might help avoiding such
> > discussions like this with people like me who aren't that deep involved
> > in gcc-specific implementation details. ;)
> >
> > Anyway, feel free to nack that patch. I don't really care and just
> > thought I should post it (e.g. as an alternative to removing that packed).
>
> At least I would be happier without the patch. I'm trying to convince
> people to not use these attributes unless required because too much
> harm is done when they are used without understanding the full
> consequences. I also recommend using __packed as localized as possible,
> i.e. set it for the members that need it, not the entire struct.
>
> I agree that your patch is harmless, it's just the opposite of
> a cleanup in my opinion.
The question is: does the structure really has to be packed?
If it does, then the follow-up question is: is a packing on word
boundaries sufficient?
If the answer is yes in both cases, then having packed,aligned(4) is not
a frivolity but rather a correctness issue. We can of course provide a
define in include/linux/compiler-gcc.hto hide the ugliness of it
somewhat:
#define __packed_32 __attribute__((packed,aligned(4)))
I suspect that the vast majority of the __packed uses in the kernel
would be better with this __packed_32 instead, the actual need and
intent would be more clearly expressed, and the generated code in the
presence of those GCC changes would then be way more efficient and still
correct.
Nicolas
On Sun, 19 Jun 2011, Nicolas Pitre wrote:
> On Thu, 16 Jun 2011, Arnd Bergmann wrote:
>
> > On Thursday 16 June 2011 22:10:53 Alexander Holler wrote:
> > > Using packed doesn't seem to be necessary (at least not with those
> > > versions of gcc I'm using here), I've tried both versions (on arm,
> > > without packed and with packed, aligned(4)) and both are working. I've
> > > only posted the patch because I found the usage of packed, aligned(4)
> > > much clearer than without packed. And It might help avoiding such
> > > discussions like this with people like me who aren't that deep involved
> > > in gcc-specific implementation details. ;)
> > >
> > > Anyway, feel free to nack that patch. I don't really care and just
> > > thought I should post it (e.g. as an alternative to removing that packed).
> >
> > At least I would be happier without the patch. I'm trying to convince
> > people to not use these attributes unless required because too much
> > harm is done when they are used without understanding the full
> > consequences. I also recommend using __packed as localized as possible,
> > i.e. set it for the members that need it, not the entire struct.
> >
> > I agree that your patch is harmless, it's just the opposite of
> > a cleanup in my opinion.
>
> The question is: does the structure really has to be packed?
What do you mean? The structure really does need to be allocated
without padding between the fields; is that the same thing? So do a
bunch of other structures that currently have no annotations at all.
> If it does, then the follow-up question is: is a packing on word
> boundaries sufficient?
Again, what do you mean? The structure contains some 32-bit fields and
it must always be allocated at a 4-byte boundary. However there's
nothing wrong with stricter allocation -- I don't recall the details
but it's entirely possible that some of the fields could be 64 bits on
some architectures, in which cases the alignment certainly should be
8-byte.
> If the answer is yes in both cases, then having packed,aligned(4) is not
> a frivolity but rather a correctness issue.
Why so? Current systems work just fine without it.
> We can of course provide a
> define in include/linux/compiler-gcc.hto hide the ugliness of it
> somewhat:
>
> #define __packed_32 __attribute__((packed,aligned(4)))
>
> I suspect that the vast majority of the __packed uses in the kernel
> would be better with this __packed_32 instead, the actual need and
> intent would be more clearly expressed, and the generated code in the
> presence of those GCC changes would then be way more efficient and still
> correct.
What if the intent is that the structure should be 4-byte aligned on
32-bit systems and 8-byte aligned on 64-bit systems? The compiler
already does this sort of thing automatically, why mess with it?
Alan Stern
On Sunday 19 June 2011 21:00:01 Alan Stern wrote:
> On Sun, 19 Jun 2011, Nicolas Pitre wrote:
> > On Thu, 16 Jun 2011, Arnd Bergmann wrote:
> > > On Thursday 16 June 2011 22:10:53 Alexander Holler wrote:
> > > At least I would be happier without the patch. I'm trying to convince
> > > people to not use these attributes unless required because too much
> > > harm is done when they are used without understanding the full
> > > consequences. I also recommend using __packed as localized as possible,
> > > i.e. set it for the members that need it, not the entire struct.
> > >
> > > I agree that your patch is harmless, it's just the opposite of
> > > a cleanup in my opinion.
> >
> > The question is: does the structure really has to be packed?
>
> What do you mean? The structure really does need to be allocated
> without padding between the fields; is that the same thing? So do a
> bunch of other structures that currently have no annotations at all.
I guess the issue is that some ABIs actually require a minimum alignment,
like the old ARM ABI that you can still use to build the kernel.
If a structure is not a multiple of four bytes in size, that ABI
will add padding at the end, e.g. in
struct s {
char c[2];
};
struct t {
struct s t1;
unsigned short t2[3];
};
On most architectures, struct s will be two bytes in size and one byte
aligned, while struct t is eight bytes and two byte aligned.
On ARM oABI, struct s ends up with four byte size and alignment while
struct t is twelve bytes long. All this is ok for regular structures,
but not when they are used to describe memory layout of hardware
registers on on-wire packets.
> > If it does, then the follow-up question is: is a packing on word
> > boundaries sufficient?
>
> > If the answer is yes in both cases, then having packed,aligned(4) is not
> > a frivolity but rather a correctness issue.
>
> Why so? Current systems work just fine without it.
I think Nicolas got it backwards here, adding both packed and
aligned(4) would make a structure like the one above consistently
incorrect when used to describe a tightly packed hardware structure.
In this case, we would have to do
struct s {
char c[2];
} __packed;
struct t {
struct s t1;
unsigned short t2[3] __aligned(2);
} __packed;
To tell the compiler that t2 is indeed aligned, while struct t
is packed to include no padding around t.
I actually recently stumbled over code that gets this wrong,
see
http://git.kernel.org/?p=linux/kernel/git/arnd/playground.git;a=commit;h=284cef173aafd531a708f48e71a9cc7249fc8a98
> > We can of course provide a
> > define in include/linux/compiler-gcc.hto hide the ugliness of it
> > somewhat:
> >
> > #define __packed_32 __attribute__((packed,aligned(4)))
> >
> > I suspect that the vast majority of the __packed uses in the kernel
> > would be better with this __packed_32 instead, the actual need and
> > intent would be more clearly expressed, and the generated code in the
> > presence of those GCC changes would then be way more efficient and still
> > correct.
>
> What if the intent is that the structure should be 4-byte aligned on
> 32-bit systems and 8-byte aligned on 64-bit systems? The compiler
> already does this sort of thing automatically, why mess with it?
Different issue.
Arnd
On Sunday 19 June 2011 22:02:23 Arnd Bergmann wrote:
> In this case, we would have to do
>
> struct s {
> char c[2];
> } __packed;
>
> struct t {
> struct s t1;
> unsigned short t2[3] __aligned(2);
> } __packed;
>
> To tell the compiler that t2 is indeed aligned, while struct t
> is packed to include no padding around t.
>
> I actually recently stumbled over code that gets this wrong,
> see
>
> http://git.kernel.org/?p=linux/kernel/git/arnd/playground.git;a=commit;h=284cef173aafd531a708f48e71a9cc7249fc8a98
>
Just to be clear: none of the ehci structures fall into this category.
I would assume that we actually have a few more drivers with this
bug in the kernel, but the patch proposed by Alexander would not
help with this problem, and the EHCI driver is still correct without
__packed.
Arnd
On Sun, 19 Jun 2011, Alan Stern wrote:
> On Sun, 19 Jun 2011, Nicolas Pitre wrote:
>
> > On Thu, 16 Jun 2011, Arnd Bergmann wrote:
> >
> > > On Thursday 16 June 2011 22:10:53 Alexander Holler wrote:
> > > > Using packed doesn't seem to be necessary (at least not with those
> > > > versions of gcc I'm using here), I've tried both versions (on arm,
> > > > without packed and with packed, aligned(4)) and both are working. I've
> > > > only posted the patch because I found the usage of packed, aligned(4)
> > > > much clearer than without packed. And It might help avoiding such
> > > > discussions like this with people like me who aren't that deep involved
> > > > in gcc-specific implementation details. ;)
> > > >
> > > > Anyway, feel free to nack that patch. I don't really care and just
> > > > thought I should post it (e.g. as an alternative to removing that packed).
> > >
> > > At least I would be happier without the patch. I'm trying to convince
> > > people to not use these attributes unless required because too much
> > > harm is done when they are used without understanding the full
> > > consequences. I also recommend using __packed as localized as possible,
> > > i.e. set it for the members that need it, not the entire struct.
> > >
> > > I agree that your patch is harmless, it's just the opposite of
> > > a cleanup in my opinion.
> >
> > The question is: does the structure really has to be packed?
>
> What do you mean? The structure really does need to be allocated
> without padding between the fields; is that the same thing? So do a
> bunch of other structures that currently have no annotations at all.
Yes, that's the same thing. The packed attribute tells the compiler
that you don't want it to insert padding in it as it sees fit.
> > If it does, then the follow-up question is: is a packing on word
> > boundaries sufficient?
>
> Again, what do you mean? The structure contains some 32-bit fields and
> it must always be allocated at a 4-byte boundary. However there's
> nothing wrong with stricter allocation -- I don't recall the details
> but it's entirely possible that some of the fields could be 64 bits on
> some architectures, in which cases the alignment certainly should be
> 8-byte.
The alignment attribute tells the compiler that whatever the structure
is, it will always be aligned to a 4-byte boundary. And because
hardware representation very rarely express 4-byte values not aligned to
a 4-byte address boundary, the combination of both attributes is
actually the best way to have no padding in a structure but still access
it with word-sized accesses when possible. Of course that assumes that
the non padded structure does have a sequence of members which sizes
match their alignments or a 4-byte boundary, whichever is the smallest.
> > If the answer is yes in both cases, then having packed,aligned(4) is not
> > a frivolity but rather a correctness issue.
>
> Why so? Current systems work just fine without it.
Doesn't mean that because it used to work that it is strictly correct.
Wouldn't be the first time that a GCC upgrade broke the kernel because
the kernel wasn't describing things properly enough.
> > We can of course provide a
> > define in include/linux/compiler-gcc.hto hide the ugliness of it
> > somewhat:
> >
> > #define __packed_32 __attribute__((packed,aligned(4)))
> >
> > I suspect that the vast majority of the __packed uses in the kernel
> > would be better with this __packed_32 instead, the actual need and
> > intent would be more clearly expressed, and the generated code in the
> > presence of those GCC changes would then be way more efficient and still
> > correct.
>
> What if the intent is that the structure should be 4-byte aligned on
> 32-bit systems and 8-byte aligned on 64-bit systems? The compiler
> already does this sort of thing automatically, why mess with it?
Above you say that the structure must not contain padding, and now you
say that you want different alignment depending on whether or not the
architecture is 32 or 64 bits?
/me confused now.
Nicolas
On Sun, 19 Jun 2011, Arnd Bergmann wrote:
> On Sunday 19 June 2011 21:00:01 Alan Stern wrote:
> > On Sun, 19 Jun 2011, Nicolas Pitre wrote:
> > > On Thu, 16 Jun 2011, Arnd Bergmann wrote:
> > > > On Thursday 16 June 2011 22:10:53 Alexander Holler wrote:
> > > > At least I would be happier without the patch. I'm trying to convince
> > > > people to not use these attributes unless required because too much
> > > > harm is done when they are used without understanding the full
> > > > consequences. I also recommend using __packed as localized as possible,
> > > > i.e. set it for the members that need it, not the entire struct.
> > > >
> > > > I agree that your patch is harmless, it's just the opposite of
> > > > a cleanup in my opinion.
> > >
> > > The question is: does the structure really has to be packed?
> >
> > What do you mean? The structure really does need to be allocated
> > without padding between the fields; is that the same thing? So do a
> > bunch of other structures that currently have no annotations at all.
>
> I guess the issue is that some ABIs actually require a minimum alignment,
> like the old ARM ABI that you can still use to build the kernel.
>
> If a structure is not a multiple of four bytes in size, that ABI
> will add padding at the end, e.g. in
>
> struct s {
> char c[2];
> };
>
> struct t {
> struct s t1;
> unsigned short t2[3];
> };
>
> On most architectures, struct s will be two bytes in size and one byte
> aligned, while struct t is eight bytes and two byte aligned.
>
> On ARM oABI, struct s ends up with four byte size and alignment while
> struct t is twelve bytes long. All this is ok for regular structures,
> but not when they are used to describe memory layout of hardware
> registers on on-wire packets.
Agreed. Is that the case with EHCI though? In your example, you'd have
to mark that structure as packed,aligned(2).
> > > If it does, then the follow-up question is: is a packing on word
> > > boundaries sufficient?
> >
> > > If the answer is yes in both cases, then having packed,aligned(4) is not
> > > a frivolity but rather a correctness issue.
> >
> > Why so? Current systems work just fine without it.
>
> I think Nicolas got it backwards here, adding both packed and
> aligned(4) would make a structure like the one above consistently
> incorrect when used to describe a tightly packed hardware structure.
I didn't look at the details, but your example above requires
aligned(2). That tells the compiler that it may use instructions to
access the data (or hardware) that are up to 2-byte wide (on ARM that
means STRh/LDRH) for the data of that width instead of the byte per byte
loads.
>
> In this case, we would have to do
>
> struct s {
> char c[2];
> } __packed;
>
> struct t {
> struct s t1;
> unsigned short t2[3] __aligned(2);
> } __packed;
>
> To tell the compiler that t2 is indeed aligned, while struct t
> is packed to include no padding around t.
... and give the aligned(2) attribute to struct t so those shorts are
accessed with a 16-bit wide load/store not byte loads/stores.
Nicolas
On Sun, 19 Jun 2011, Nicolas Pitre wrote:
> > > The question is: does the structure really has to be packed?
> >
> > What do you mean? The structure really does need to be allocated
> > without padding between the fields; is that the same thing? So do a
> > bunch of other structures that currently have no annotations at all.
>
> Yes, that's the same thing. The packed attribute tells the compiler
> that you don't want it to insert padding in it as it sees fit.
I thought the packed attribute does more than that. For example, on
some architectures doesn't it also force the compiler to use
byte-oriented instructions for accessing the structure's fields?
> > > If the answer is yes in both cases, then having packed,aligned(4) is not
> > > a frivolity but rather a correctness issue.
> >
> > Why so? Current systems work just fine without it.
>
> Doesn't mean that because it used to work that it is strictly correct.
> Wouldn't be the first time that a GCC upgrade broke the kernel because
> the kernel wasn't describing things properly enough.
It seems most unlikely that a gcc upgrade would cause unnecessary
padding to be added between a bunch of fields, all of the same size and
alignment.
> > What if the intent is that the structure should be 4-byte aligned on
> > 32-bit systems and 8-byte aligned on 64-bit systems? The compiler
> > already does this sort of thing automatically, why mess with it?
>
> Above you say that the structure must not contain padding, and now you
> say that you want different alignment depending on whether or not the
> architecture is 32 or 64 bits?
>
> /me confused now.
I looked at the structures in question; they contain nothing but u32
values and arrays of u32's, except for an array of u8's at the end of
one of the structures. So this question doesn't arise for them.
On the other hand, one of the other structures you haven't been
considering looks like this (with a bunch of uninteresting #define
lines omitted):
struct ehci_qtd {
/* first part defined by EHCI spec */
__hc32 hw_next; /* see EHCI 3.5.1 */
__hc32 hw_alt_next; /* see EHCI 3.5.2 */
__hc32 hw_token; /* see EHCI 3.5.3 */
__hc32 hw_buf [5]; /* see EHCI 3.5.4 */
__hc32 hw_buf_hi [5]; /* Appendix B */
/* the rest is HCD-private */
dma_addr_t qtd_dma; /* qtd address */
struct list_head qtd_list; /* sw qtd list */
struct urb *urb; /* qtd's urb */
size_t length; /* length of buffer */
} __attribute__ ((aligned (32)));
(__hc32 is an unsigned 32-bit type which can be either big-endian or
little-endian, depending on the device hardware.)
Only the first 5 fields need to be allocated without padding; the last
4 can be laid out arbitrarily because they do not correspond to
anything in the hardware. Once again, I do not think the ((packed))
attribute is needed here -- in fact, we probably want to avoid it
because dma_addr_t can be 64 bits even on 32-bit architectures.
Alan Stern
On Mon, 20 Jun 2011, Alan Stern wrote:
> On Sun, 19 Jun 2011, Nicolas Pitre wrote:
>
> > > > The question is: does the structure really has to be packed?
> > >
> > > What do you mean? The structure really does need to be allocated
> > > without padding between the fields; is that the same thing? So do a
> > > bunch of other structures that currently have no annotations at all.
> >
> > Yes, that's the same thing. The packed attribute tells the compiler
> > that you don't want it to insert padding in it as it sees fit.
>
> I thought the packed attribute does more than that. For example, on
> some architectures doesn't it also force the compiler to use
> byte-oriented instructions for accessing the structure's fields?
Yes, but that's a consequence of not being able to access those fields
in their naturally aligned position anymore. Hence the addition of the
align attribute to tell the compiler that we know that the structure is
still aligned to a certain degree letting the compiler to avoid
byte-oriented instructions when possible.
> > > > If the answer is yes in both cases, then having packed,aligned(4) is not
> > > > a frivolity but rather a correctness issue.
> > >
> > > Why so? Current systems work just fine without it.
> >
> > Doesn't mean that because it used to work that it is strictly correct.
> > Wouldn't be the first time that a GCC upgrade broke the kernel because
> > the kernel wasn't describing things properly enough.
>
> It seems most unlikely that a gcc upgrade would cause unnecessary
> padding to be added between a bunch of fields, all of the same size and
> alignment.
It is not the padding, but rather the decision to use or not to use
byte-oriented instructions in the abscence of explicit alignment
indication which appears to have changed with a recent GCC, which is the
source of this thread.
> > > What if the intent is that the structure should be 4-byte aligned on
> > > 32-bit systems and 8-byte aligned on 64-bit systems? The compiler
> > > already does this sort of thing automatically, why mess with it?
> >
> > Above you say that the structure must not contain padding, and now you
> > say that you want different alignment depending on whether or not the
> > architecture is 32 or 64 bits?
> >
> > /me confused now.
>
> I looked at the structures in question; they contain nothing but u32
> values and arrays of u32's, except for an array of u8's at the end of
> one of the structures. So this question doesn't arise for them.
>
> On the other hand, one of the other structures you haven't been
> considering looks like this (with a bunch of uninteresting #define
> lines omitted):
>
> struct ehci_qtd {
> /* first part defined by EHCI spec */
> __hc32 hw_next; /* see EHCI 3.5.1 */
> __hc32 hw_alt_next; /* see EHCI 3.5.2 */
> __hc32 hw_token; /* see EHCI 3.5.3 */
> __hc32 hw_buf [5]; /* see EHCI 3.5.4 */
> __hc32 hw_buf_hi [5]; /* Appendix B */
>
> /* the rest is HCD-private */
> dma_addr_t qtd_dma; /* qtd address */
> struct list_head qtd_list; /* sw qtd list */
> struct urb *urb; /* qtd's urb */
> size_t length; /* length of buffer */
> } __attribute__ ((aligned (32)));
>
> (__hc32 is an unsigned 32-bit type which can be either big-endian or
> little-endian, depending on the device hardware.)
>
> Only the first 5 fields need to be allocated without padding; the last
> 4 can be laid out arbitrarily because they do not correspond to
> anything in the hardware. Once again, I do not think the ((packed))
> attribute is needed here -- in fact, we probably want to avoid it
> because dma_addr_t can be 64 bits even on 32-bit architectures.
Indeed, nothing indicates that any packed attribute is required here.
Nicolas
On Monday 20 June 2011, Alan Stern wrote:
> On Sun, 19 Jun 2011, Nicolas Pitre wrote:
>
> > > > The question is: does the structure really has to be packed?
> > >
> > > What do you mean? The structure really does need to be allocated
> > > without padding between the fields; is that the same thing? So do a
> > > bunch of other structures that currently have no annotations at all.
> >
> > Yes, that's the same thing. The packed attribute tells the compiler
> > that you don't want it to insert padding in it as it sees fit.
>
> I thought the packed attribute does more than that. For example, on
> some architectures doesn't it also force the compiler to use
> byte-oriented instructions for accessing the structure's fields?
Correct, and ARM is one of those. Giving both packed and aligned(4)
will make gcc use 32-bit accesses again-
> > > > If the answer is yes in both cases, then having packed,aligned(4) is not
> > > > a frivolity but rather a correctness issue.
> > >
> > > Why so? Current systems work just fine without it.
> >
> > Doesn't mean that because it used to work that it is strictly correct.
> > Wouldn't be the first time that a GCC upgrade broke the kernel because
> > the kernel wasn't describing things properly enough.
>
> It seems most unlikely that a gcc upgrade would cause unnecessary
> padding to be added between a bunch of fields, all of the same size and
> alignment.
I agree. The issue is mostly between EABI and oABI compilers behaving
differently on ARM, as well as differences between architectures.
> On the other hand, one of the other structures you haven't been
> considering looks like this (with a bunch of uninteresting #define
> lines omitted):
>
> struct ehci_qtd {
> /* first part defined by EHCI spec */
> __hc32 hw_next; /* see EHCI 3.5.1 */
> __hc32 hw_alt_next; /* see EHCI 3.5.2 */
> __hc32 hw_token; /* see EHCI 3.5.3 */
> __hc32 hw_buf [5]; /* see EHCI 3.5.4 */
> __hc32 hw_buf_hi [5]; /* Appendix B */
>
> /* the rest is HCD-private */
> dma_addr_t qtd_dma; /* qtd address */
> struct list_head qtd_list; /* sw qtd list */
> struct urb *urb; /* qtd's urb */
> size_t length; /* length of buffer */
> } __attribute__ ((aligned (32)));
>
> (__hc32 is an unsigned 32-bit type which can be either big-endian or
> little-endian, depending on the device hardware.)
>
> Only the first 5 fields need to be allocated without padding; the last
> 4 can be laid out arbitrarily because they do not correspond to
> anything in the hardware. Once again, I do not think the ((packed))
> attribute is needed here -- in fact, we probably want to avoid it
> because dma_addr_t can be 64 bits even on 32-bit architectures.
Agreed as well. The packing only ever matters for data structures
interpreted outside of the kernel -- user space, hardware or
network. The first five members in this structure seem to be
accessed by hardware, but they are all aligned correctly. The
other members are only used inside of the kernel and that has to
be built using only one ABI. goto no_problem;
Arnd
On Mon, 20 Jun 2011, Nicolas Pitre wrote:
> On Mon, 20 Jun 2011, Alan Stern wrote:
>
> > On Sun, 19 Jun 2011, Nicolas Pitre wrote:
> >
> > > > > The question is: does the structure really has to be packed?
> > > >
> > > > What do you mean? The structure really does need to be allocated
> > > > without padding between the fields; is that the same thing? So do a
> > > > bunch of other structures that currently have no annotations at all.
> > >
> > > Yes, that's the same thing. The packed attribute tells the compiler
> > > that you don't want it to insert padding in it as it sees fit.
> >
> > I thought the packed attribute does more than that. For example, on
> > some architectures doesn't it also force the compiler to use
> > byte-oriented instructions for accessing the structure's fields?
>
> Yes, but that's a consequence of not being able to access those fields
> in their naturally aligned position anymore. Hence the addition of the
> align attribute to tell the compiler that we know that the structure is
> still aligned to a certain degree letting the compiler to avoid
> byte-oriented instructions when possible.
Not exactly. As far as I can tell, the ((packed)) attribute caused the
compiler to change the structure's alignment from its natural value to
1. That's why the fields weren't in their naturally aligned positions
and why removing ((packed)) fixed the problem.
> > > > > If the answer is yes in both cases, then having packed,aligned(4) is not
> > > > > a frivolity but rather a correctness issue.
> > > >
> > > > Why so? Current systems work just fine without it.
> > >
> > > Doesn't mean that because it used to work that it is strictly correct.
> > > Wouldn't be the first time that a GCC upgrade broke the kernel because
> > > the kernel wasn't describing things properly enough.
> >
> > It seems most unlikely that a gcc upgrade would cause unnecessary
> > padding to be added between a bunch of fields, all of the same size and
> > alignment.
>
> It is not the padding, but rather the decision to use or not to use
> byte-oriented instructions in the abscence of explicit alignment
> indication which appears to have changed with a recent GCC, which is the
> source of this thread.
I thought the source of the thread had nothing to do with any recent
changes to gcc. Maybe I was wrong. In any case, the issue was not the
lack of an alignment indication but rather the unnecessary presence of
a ((packed)) attribute causing the compiler to forget about the natural
alignment.
To put it another way, the problem was caused by having ((packed))
where it wasn't needed. You want to fix the immediate fallout of the
problem by adding an alignment attribute, instead of fixing the problem
itself by removing the underlying cause.
> > On the other hand, one of the other structures you haven't been
> > considering looks like this (with a bunch of uninteresting #define
> > lines omitted):
> >
> > struct ehci_qtd {
> > /* first part defined by EHCI spec */
> > __hc32 hw_next; /* see EHCI 3.5.1 */
> > __hc32 hw_alt_next; /* see EHCI 3.5.2 */
> > __hc32 hw_token; /* see EHCI 3.5.3 */
> > __hc32 hw_buf [5]; /* see EHCI 3.5.4 */
> > __hc32 hw_buf_hi [5]; /* Appendix B */
> >
> > /* the rest is HCD-private */
> > dma_addr_t qtd_dma; /* qtd address */
> > struct list_head qtd_list; /* sw qtd list */
> > struct urb *urb; /* qtd's urb */
> > size_t length; /* length of buffer */
> > } __attribute__ ((aligned (32)));
> >
> > (__hc32 is an unsigned 32-bit type which can be either big-endian or
> > little-endian, depending on the device hardware.)
> >
> > Only the first 5 fields need to be allocated without padding; the last
> > 4 can be laid out arbitrarily because they do not correspond to
> > anything in the hardware. Once again, I do not think the ((packed))
> > attribute is needed here -- in fact, we probably want to avoid it
> > because dma_addr_t can be 64 bits even on 32-bit architectures.
>
> Indeed, nothing indicates that any packed attribute is required here.
Likewise, nothing indicates any packed attribute is required for the
structures in include/linux/usb/ehci_def.h.
Alan Stern
On Monday 20 June 2011, Alan Stern wrote:
> I thought the source of the thread had nothing to do with any recent
> changes to gcc. Maybe I was wrong. In any case, the issue was not the
> lack of an alignment indication but rather the unnecessary presence of
> a ((packed)) attribute causing the compiler to forget about the natural
> alignment.
>
> To put it another way, the problem was caused by having ((packed))
> where it wasn't needed. You want to fix the immediate fallout of the
> problem by adding an alignment attribute, instead of fixing the problem
> itself by removing the underlying cause.
A recent change in gcc changed the default behaviour when compiling the
ehci driver on ARM, but the behaviour was already nondeterministic
because the definition of the readl/writel macros on ARM relies on
unspecified behaviour (cast to pointer with larger aligment).
We are also going to change the ARM implementation to always do 32 bit
accesses in readl/writel, but the patch that went into the ehci driver
was correct nonetheless.
Arnd
On Mon, 20 Jun 2011, Alan Stern wrote:
> On Mon, 20 Jun 2011, Nicolas Pitre wrote:
>
> > On Mon, 20 Jun 2011, Alan Stern wrote:
> >
> > > On Sun, 19 Jun 2011, Nicolas Pitre wrote:
> > >
> > > > > > The question is: does the structure really has to be packed?
> > > > >
> > > > > What do you mean? The structure really does need to be allocated
> > > > > without padding between the fields; is that the same thing? So do a
> > > > > bunch of other structures that currently have no annotations at all.
> > > >
> > > > Yes, that's the same thing. The packed attribute tells the compiler
> > > > that you don't want it to insert padding in it as it sees fit.
> > >
> > > I thought the packed attribute does more than that. For example, on
> > > some architectures doesn't it also force the compiler to use
> > > byte-oriented instructions for accessing the structure's fields?
> >
> > Yes, but that's a consequence of not being able to access those fields
> > in their naturally aligned position anymore. Hence the addition of the
> > align attribute to tell the compiler that we know that the structure is
> > still aligned to a certain degree letting the compiler to avoid
> > byte-oriented instructions when possible.
>
> Not exactly. As far as I can tell, the ((packed)) attribute caused the
> compiler to change the structure's alignment from its natural value to
> 1. That's why the fields weren't in their naturally aligned positions
> and why removing ((packed)) fixed the problem.
Are we talking past each other?
Remember that I was the one asking if the align attribute was needed in
the first place. If it is not then by all means please get rid of it!
But if it _is_ needed, then the generated code can be much better if the
packed attribute is _also_ followed by the align attribute to
increase it from 1.
Nicolas
On Mon, 20 Jun 2011, Nicolas Pitre wrote:
> Are we talking past each other?
>
> Remember that I was the one asking if the align attribute was needed in
> the first place. If it is not then by all means please get rid of it!
>
> But if it _is_ needed, then the generated code can be much better if the
> packed attribute is _also_ followed by the align attribute to
> increase it from 1.
According to Arnd, any remaining possible issues will be addressed by
changing the implementation of readl/writel on ARM. It doesn't look as
though the ehci files need anything else done.
As far as I can tell, the other structures in ehci.h have
((aligned(32)) simply in order to save space, since there can be large
numbers of these structures allocated. That doesn't apply to the
structures in ehci_def.h; there will only be one of each.
Alan Stern
Am 20.06.2011 19:10, schrieb Nicolas Pitre:
> On Mon, 20 Jun 2011, Alan Stern wrote:
>
>> On Mon, 20 Jun 2011, Nicolas Pitre wrote:
>>
>>> On Mon, 20 Jun 2011, Alan Stern wrote:
>>>
>>>> On Sun, 19 Jun 2011, Nicolas Pitre wrote:
>>>>
>>>>>>> The question is: does the structure really has to be packed?
>>>>>>
>>>>>> What do you mean? The structure really does need to be allocated
>>>>>> without padding between the fields; is that the same thing? So do a
>>>>>> bunch of other structures that currently have no annotations at all.
>>>>>
>>>>> Yes, that's the same thing. The packed attribute tells the compiler
>>>>> that you don't want it to insert padding in it as it sees fit.
>>>>
>>>> I thought the packed attribute does more than that. For example, on
>>>> some architectures doesn't it also force the compiler to use
>>>> byte-oriented instructions for accessing the structure's fields?
>>>
>>> Yes, but that's a consequence of not being able to access those fields
>>> in their naturally aligned position anymore. Hence the addition of the
>>> align attribute to tell the compiler that we know that the structure is
>>> still aligned to a certain degree letting the compiler to avoid
>>> byte-oriented instructions when possible.
>>
>> Not exactly. As far as I can tell, the ((packed)) attribute caused the
>> compiler to change the structure's alignment from its natural value to
>> 1. That's why the fields weren't in their naturally aligned positions
>> and why removing ((packed)) fixed the problem.
>
> Are we talking past each other?
>
> Remember that I was the one asking if the align attribute was needed in
> the first place. If it is not then by all means please get rid of it!
>
> But if it _is_ needed, then the generated code can be much better if the
> packed attribute is _also_ followed by the align attribute to
> increase it from 1.
That reminds me of some time where I had fun asking someone with deeper
ppc- and xlC-knowledge than I, if he is sure that assignments, are
atomic. He always said yes. Got something like a running gag. ;)
I see it that way: packed is needed to be sure that at least for struct
ehci_regs there are no padding bytes inbetween the members. It might
work without, but that depends on the compiler (-version, architecture,
whatever).
That packed without an additional aligned() caused errors on ARM with
gcc 4.6 is another problem which got (currently) fixed by removing packed.
But this introduces imho doubts and uncertainty about if padding bytes
could be between the members, therefore I would prefer to use packed
with aligned instead of removing the packed.
Regards,
Alexander
On Mon, 20 Jun 2011, Alexander Holler wrote:
> I see it that way: packed is needed to be sure that at least for struct
> ehci_regs there are no padding bytes inbetween the members.
But is it _really_ needed?
> It might
> work without, but that depends on the compiler (-version, architecture,
> whatever).
Have there _ever_ been _any_ combinations of compiler, version,
architecture, whatever, that had unwanted padding bytes in this
structure?
Alan Stern
Am 20.06.2011 20:39, schrieb Alan Stern:
> On Mon, 20 Jun 2011, Alexander Holler wrote:
>
>> I see it that way: packed is needed to be sure that at least for struct
>> ehci_regs there are no padding bytes inbetween the members.
>
> But is it _really_ needed?
>
>> It might
>> work without, but that depends on the compiler (-version, architecture,
>> whatever).
>
> Have there _ever_ been _any_ combinations of compiler, version,
> architecture, whatever, that had unwanted padding bytes in this
> structure?
I don't know. But if there would be no doubts, this discussion would not
happen and I assume there never would have been an attribute packed there.
Regards,
Alexander
On Mon, Jun 20, 2011 at 01:35:35PM -0400, Alan Stern wrote:
> According to Arnd, any remaining possible issues will be addressed by
> changing the implementation of readl/writel on ARM. It doesn't look as
> though the ehci files need anything else done.
I'm not about to change their implementation because they've proven
themselves over the last 10 years to be perfectly fine, and changing
them has a habbit of causing GCC to play less optimally than it should
do.
I've seen drivers where GCC reloads the base address from the driver
private data structure each time a register access is performed, rather
than caching the base address in a register. I've seen it issuing
separate add instructions and using a zero pre-index load/store. The
existing way is the only way I've found to get GCC to come anywhere
close to producing "optimal" code for the IO accessors.
If it is the case that these structures do not require packing to get
their desired layout, then they don't require packing, and the packed
attribute should be dropped.
On Mon, 20 Jun 2011, Alexander Holler wrote:
> Am 20.06.2011 20:39, schrieb Alan Stern:
> > On Mon, 20 Jun 2011, Alexander Holler wrote:
> >
> >> I see it that way: packed is needed to be sure that at least for struct
> >> ehci_regs there are no padding bytes inbetween the members.
> >
> > But is it _really_ needed?
> >
> >> It might
> >> work without, but that depends on the compiler (-version, architecture,
> >> whatever).
> >
> > Have there _ever_ been _any_ combinations of compiler, version,
> > architecture, whatever, that had unwanted padding bytes in this
> > structure?
>
> I don't know. But if there would be no doubts, this discussion would not
> happen and I assume there never would have been an attribute packed there.
Don't be so sure. That is very old code; the attribute could easily
have been present for no good reason.
Alan Stern
On Mon, Jun 20, 2011 at 06:58:45PM +0200, Arnd Bergmann wrote:
> A recent change in gcc changed the default behaviour when compiling the
> ehci driver on ARM, but the behaviour was already nondeterministic
> because the definition of the readl/writel macros on ARM relies on
> unspecified behaviour (cast to pointer with larger aligment).
It's unspecified behaviour period. If you pass a pointer to readl/writel
which is not word aligned, what you get back is anyones guess.
Unaligned load/stores to devices are 'unpredictable' on ARM, and are
probably unpredictable on any other sane arch (unless you can predict
how the load is going to be issued on the bus, how the peripheral is
going to respond, and how the bus activity is going to be assembled
into a word.)
So, to issue a readl against an u16 pointer is unpredictable not only
due to the C language, but also from the sanity point of view.
> We are also going to change the ARM implementation to always do 32 bit
> accesses in readl/writel, but the patch that went into the ehci driver
> was correct nonetheless.
Not without someone doing a comparitively large amount of work to analyze
the effect of any change there and make sure that it doesn't have a
negative impact to drivers.
On Mon, 20 Jun 2011, Alan Stern wrote:
> On Mon, 20 Jun 2011, Nicolas Pitre wrote:
>
> > Are we talking past each other?
> >
> > Remember that I was the one asking if the align attribute was needed in
> > the first place. If it is not then by all means please get rid of it!
> >
> > But if it _is_ needed, then the generated code can be much better if the
> > packed attribute is _also_ followed by the align attribute to
> > increase it from 1.
>
> According to Arnd, any remaining possible issues will be addressed by
> changing the implementation of readl/writel on ARM. It doesn't look as
> though the ehci files need anything else done.
Any usage of __packed is potentially making the code less optimal than
it could, depending on the actual layout of the structure where this is
applied, because outside of the IO accessor context, the compiler would
use less than optimal instructions when accessing the structure members.
If what you have is:
struct foo {
u8 c;
u32 d;
u8 e;
};
If you need that structure to be packed then so be it and nothing else
can be done about it.
However if you have:
struct foo {
u32 c;
u64 d;
u32 e;
};
Here the d member is not naturally aligned. On most architectures,
including ARM with the ABI currently in use, the compiler would insert a
32-bit padding between c and d. If you must prevent that from happening
then you'll mark this struct with __packed. However that will also mark
it as having an alignment of 1, meaning that all accesses to this
structure will be done byte by byte and the resulting values
reconstructed with shifts and ORs.
Whar ARnd is talking about is _only_ about the IO accessor on ARM which
behavior changed with newer GCC versions. Changing the IO accessor
implementation will fix the byte sized access issue to the hardware, but
the rest of the code will still suck even if it will work correctly.
By adding the aligned(4) attribute here, you're telling the compiler
that despite the packing attribute, it may assume that the structure is
still aligned on a 32-bit boundary (which is normally true except if you
cast a random pointer to this struct of course) and therefore it can
still use 32-bit sized accesses, and the u64 member will be correctly
accessed using a pair of 32-bit accesses instead of 8 byte sized
accesses.
So this is a matter of being intelligent with those attributes and not
stamping them freely just because a structure might be mapped to some
hardware representation. In most cases, the packed attribute is just
unneeded.
> As far as I can tell, the other structures in ehci.h have
> ((aligned(32)) simply in order to save space, since there can be large
> numbers of these structures allocated.
How can increasing the alignment to 32 bytes save space?
Usually a greater alignment is used to ensure proper mapping to CPU
cache line boundaries, not to save space.
Nicolas
On Mon, 20 Jun 2011, Russell King - ARM Linux wrote:
> On Mon, Jun 20, 2011 at 06:58:45PM +0200, Arnd Bergmann wrote:
> > A recent change in gcc changed the default behaviour when compiling the
> > ehci driver on ARM, but the behaviour was already nondeterministic
> > because the definition of the readl/writel macros on ARM relies on
> > unspecified behaviour (cast to pointer with larger aligment).
>
> It's unspecified behaviour period. If you pass a pointer to readl/writel
> which is not word aligned, what you get back is anyones guess.
The pointer _is_ aligned. The problem is that it comes from the address
of a structure member, which structure is marked __packed.
Older GCC would see the cast to unsigned int applied to that pointer
within the ARM's readl()/writel() implementation and ignore that the
source of the address is marked __packed, which __packed implies an
alignment of 1. REcent GCC versions would honnor the alignment of 1 and
perform the unsigned int * access using byte sized loads/stores.
Nicolas
On Mon, 20 Jun 2011, Russell King - ARM Linux wrote:
> On Mon, Jun 20, 2011 at 06:58:45PM +0200, Arnd Bergmann wrote:
> > We are also going to change the ARM implementation to always do 32 bit
> > accesses in readl/writel, but the patch that went into the ehci driver
> > was correct nonetheless.
>
> Not without someone doing a comparitively large amount of work to analyze
> the effect of any change there and make sure that it doesn't have a
> negative impact to drivers.
This thread prompted me to investigate a bit. I have vague memories for
the reasons why we decided to use plain C for the IO accessors as the
inline assembly version didn't produce nearly the same code quality. It
seems that GCC improved quite a bit there, and from a quick
investigation, it looks like comparable code is being generated with the
C and the inline asm versions with a recent enough GCC. This is
certainly the case with the version causing issues with packed
structures.
Nicolas
On Mon, Jun 20, 2011 at 03:14:26PM -0400, Nicolas Pitre wrote:
> If you need that structure to be packed then so be it and nothing else
> can be done about it.
>
> However if you have:
>
> struct foo {
> u32 c;
> u64 d;
> u32 e;
> };
>
> Here the d member is not naturally aligned. On most architectures,
> including ARM with the ABI currently in use, the compiler would insert a
> 32-bit padding between c and d.
And if 'struct foo' represents a structure in device memory, the end
result is highly unpredicable whether or not you have padding or
accessors to load 'd' there. So, you would not have such a structure
describing a data structure in memory returned by ioremap().
Now, the real question is: is there any architecture which is (or may
be) supported by the Linux kernel which would add padding to:
struct foo {
u8 a;
u8 b;
u16 c;
u32 d;
u64 e;
};
?
The last gotcha here is struct size.
struct bar {
u8 a;
u8 b;
};
May be 2 on some Linux supporting architectures, or may be larger due to
tail padding. Eg, ARM OABI will add two bytes of tail padding to this.
If we assume that 'struct foo' will be laid out as we desire (iow, no
additional padding with naturally aligned elements) then the only
remaining issue is sizeof(struct), and that's a whole different ballgame.
That shouldn't be solved by packed.
On Mon, 20 Jun 2011, Alan Stern wrote:
> On Mon, 20 Jun 2011, Alexander Holler wrote:
>
> > I see it that way: packed is needed to be sure that at least for struct
> > ehci_regs there are no padding bytes inbetween the members.
>
> But is it _really_ needed?
>
> > It might
> > work without, but that depends on the compiler (-version, architecture,
> > whatever).
>
> Have there _ever_ been _any_ combinations of compiler, version,
> architecture, whatever, that had unwanted padding bytes in this
> structure?
This can be determined by simple code inspection.
If you must have struct members which are not aligned to their natural
size then you need __packed. Example:
struct foo {
u8 a;
u16 b;
u32 c;
u64 d;
};
Without __packed, there will be padding between a and b, and between c
and d. If the order of the members in this struct were reversed, then
everything would be naturally aligned and no padding between members
would be inserted.
The size of structures is normally rounded up with padding to the size
of the largest basic element it contains. Example:
struct foo {
u64 a;
u8 b;
};
Here sizeof(struct foo) would return 16, even if the actual content
occupies 9 bytes only. That's because the largest basic element is u64
i.e. 8 bytes. Normally this trailing padding is not an issue, unless
you have an array of such a struct or if it is a member of another
struct. If you want to get rid of that padding, you need to use
__packed again (which of course would make all subsequent instances of
that structure in your array completely misaligned too).
Two odd exceptions with the old ABI on ARM:
- The alignment of a 64-bit value is always 4 bytes not 8.
- The size of all structures are always rounded up to a 4-byte boundary,
irrespective of their content.
If you fall into none of the above issues, then you don't need any
__packed, period.
Nicolas
On Monday 20 June 2011 19:39:34 Alexander Holler wrote:
> That packed without an additional aligned() caused errors on ARM with
> gcc 4.6 is another problem which got (currently) fixed by removing packed.
Packed caused errors because it is *wrong*. The code as it was used undefined
behavior in the language.
> But this introduces imho doubts and uncertainty about if padding bytes
> could be between the members, therefore I would prefer to use packed
> with aligned instead of removing the packed.
Packing was never an issue here, please stop talking about it.
Arnd
On Monday 20 June 2011 20:39:02 Alan Stern wrote:
> On Mon, 20 Jun 2011, Alexander Holler wrote:
>
> > I see it that way: packed is needed to be sure that at least for struct
> > ehci_regs there are no padding bytes inbetween the members.
>
> But is it really needed?
No. When the structure is marked packed, it's broken because it relies
on undefined behavior. If it's not packed, there is no problem.
> > It might
> > work without, but that depends on the compiler (-version, architecture,
> > whatever).
>
> Have there ever been any combinations of compiler, version,
> architecture, whatever, that had unwanted padding bytes in this
> structure?
Only on compilers that are not able to build Linux kernels anyway.
Arnd
On Monday 20 June 2011 21:32:13 Russell King - ARM Linux wrote:
> > Here the d member is not naturally aligned. On most architectures,
> > including ARM with the ABI currently in use, the compiler would insert a
> > 32-bit padding between c and d.
>
> And if 'struct foo' represents a structure in device memory, the end
> result is highly unpredicable whether or not you have padding or
> accessors to load 'd' there. So, you would not have such a structure
> describing a data structure in memory returned by ioremap().
Right.
> Now, the real question is: is there any architecture which is (or may
> be) supported by the Linux kernel which would add padding to:
>
> struct foo {
> u8 a;
> u8 b;
> u16 c;
> u32 d;
> u64 e;
> };
This is the other issue, which we were facing in the scsi drivers.
If an architecture requires padding because some members require
larger than natural alignment here, the 'packed' should be applied
to that member, in order to change the alignment of that member.
Arnd
On Monday 20 June 2011 20:48:49 Russell King - ARM Linux wrote:
> On Mon, Jun 20, 2011 at 01:35:35PM -0400, Alan Stern wrote:
> > According to Arnd, any remaining possible issues will be addressed by
> > changing the implementation of readl/writel on ARM. It doesn't look as
> > though the ehci files need anything else done.
>
> I'm not about to change their implementation because they've proven
> themselves over the last 10 years to be perfectly fine, and changing
> them has a habbit of causing GCC to play less optimally than it should
> do.
Well, we do know that gcc now makes different tradeoffs, and that it's
entirely within the C99 specification when it's generating byte accesses
from __raw_readl(). The case where the pointer is __packed is just the
obvious case where it would do that, and I fully agree that the __packed
in that case is a bug, but I'm much in favor of writing code so that
we instruct the compiler to create correct code rather than giving it
the choice between correct and incorrect.
> I've seen drivers where GCC reloads the base address from the driver
> private data structure each time a register access is performed, rather
> than caching the base address in a register. I've seen it issuing
> separate add instructions and using a zero pre-index load/store. The
> existing way is the only way I've found to get GCC to come anywhere
> close to producing "optimal" code for the IO accessors.
Two points here:
* What's the olders compiler that we really need to be able to build
efficient kernels? Would you consider it if we can show that gcc-4.2
and higher produce code that is as good as the existing macros?
How about making the code gcc version dependent?
* We already need a compiler barrier in the non-_relaxed() versions of
the I/O accessors, which will force a reload of the base address
in a lot of cases, so the code is already suboptimal. Yes, we don't
have the barrier today without CONFIG_ARM_DMA_MEM_BUFFERABLE, but that
is a bug, because it lets the compiler move accesses to DMA buffers
around readl/writel.
> If it is the case that these structures do not require packing to get
> their desired layout, then they don't require packing, and the packed
> attribute should be dropped.
Yes. But are you going to audit every other use of __packed in the kernel
to check if it is used on __iomem pointers?
Arnd
On Mon, 20 Jun 2011, Arnd Bergmann wrote:
> On Monday 20 June 2011 19:39:34 Alexander Holler wrote:
> > That packed without an additional aligned() caused errors on ARM with
> > gcc 4.6 is another problem which got (currently) fixed by removing packed.
>
> Packed caused errors because it is *wrong*. The code as it was used undefined
> behavior in the language.
I wouldn't call this issue as such, but this is a Red herring.
Could you please provide a pointer to the structure definition so a
second opinion to the usefulness of __packed there could be provided?
If it is not matching any of the fairly limited cases where having
__packed is relevant then we can just confirm that it should go.
Nicolas
On Monday 20 June 2011 22:28:49 Nicolas Pitre wrote:
> On Mon, 20 Jun 2011, Arnd Bergmann wrote:
>
> > On Monday 20 June 2011 19:39:34 Alexander Holler wrote:
> > > That packed without an additional aligned() caused errors on ARM with
> > > gcc 4.6 is another problem which got (currently) fixed by removing packed.
> >
> > Packed caused errors because it is *wrong*. The code as it was used undefined
> > behavior in the language.
>
> I wouldn't call this issue as such, but this is a Red herring.
>
> Could you please provide a pointer to the structure definition so a
> second opinion to the usefulness of __packed there could be provided?
The structures in question are ehci_caps, ehci_regs and ehci_dbg_port.
The patch that remove the __packed attribute was 139540170 "USB: ehci:
remove structure packing from ehci_def".
The reason why I consider it a bug is that an access to a register
using readl/writel on the structure requires casting a pointer with
byte alignment to a pointer with word alignment, which is undefined
in C. Gcc just tries to be helpful and work around this by turning
the access into bytewise load/store instructions. In older gcc versions,
it would not do that if you happen to also case from non-volatile to
volatile pointer, but according to Uli that was not an intentional
feature of gcc but the ARM code just worked by pure coincidence.
> If it is not matching any of the fairly limited cases where having
> __packed is relevant then we can just confirm that it should go.
It's already gone.
Arnd
On Mon, 20 Jun 2011, Nicolas Pitre wrote:
> Any usage of __packed is potentially making the code less optimal than
> it could, depending on the actual layout of the structure where this is
> applied, because outside of the IO accessor context, the compiler would
> use less than optimal instructions when accessing the structure members.
>
> If what you have is:
>
> struct foo {
> u8 c;
> u32 d;
> u8 e;
> };
>
> If you need that structure to be packed then so be it and nothing else
> can be done about it.
>
> However if you have:
>
> struct foo {
> u32 c;
> u64 d;
> u32 e;
> };
>
> Here the d member is not naturally aligned. On most architectures,
> including ARM with the ABI currently in use, the compiler would insert a
> 32-bit padding between c and d. If you must prevent that from happening
> then you'll mark this struct with __packed. However that will also mark
> it as having an alignment of 1, meaning that all accesses to this
> structure will be done byte by byte and the resulting values
> reconstructed with shifts and ORs.
Agreed.
> Whar ARnd is talking about is _only_ about the IO accessor on ARM which
> behavior changed with newer GCC versions. Changing the IO accessor
> implementation will fix the byte sized access issue to the hardware, but
> the rest of the code will still suck even if it will work correctly.
>
> By adding the aligned(4) attribute here, you're telling the compiler
> that despite the packing attribute, it may assume that the structure is
> still aligned on a 32-bit boundary (which is normally true except if you
> cast a random pointer to this struct of course) and therefore it can
> still use 32-bit sized accesses, and the u64 member will be correctly
> accessed using a pair of 32-bit accesses instead of 8 byte sized
> accesses.
>
> So this is a matter of being intelligent with those attributes and not
> stamping them freely just because a structure might be mapped to some
> hardware representation. In most cases, the packed attribute is just
> unneeded.
Again, agreed. The current code does not have the packed attribute.
> > As far as I can tell, the other structures in ehci.h have
> > ((aligned(32)) simply in order to save space, since there can be large
> > numbers of these structures allocated.
>
> How can increasing the alignment to 32 bytes save space?
No, no -- the alignment is _decreased_ to 32 bits. Without the
attribute the alignment would have been 64 bits.
> Usually a greater alignment is used to ensure proper mapping to CPU
> cache line boundaries, not to save space.
Irrelevant to the point I was making.
Alan Stern
On Mon, 20 Jun 2011, Arnd Bergmann wrote:
> On Monday 20 June 2011 20:48:49 Russell King - ARM Linux wrote:
> > If it is the case that these structures do not require packing to get
> > their desired layout, then they don't require packing, and the packed
> > attribute should be dropped.
>
> Yes. But are you going to audit every other use of __packed in the kernel
> to check if it is used on __iomem pointers?
The compiler might tell us about it:
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index d66605d..10c47e8 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -49,11 +49,11 @@ extern void __raw_readsl(const void __iomem *addr, void *data, int longlen);
#define __raw_writeb(v,a) (__chk_io_ptr(a), *(volatile unsigned char __force *)(a) = (v))
#define __raw_writew(v,a) (__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v))
-#define __raw_writel(v,a) (__chk_io_ptr(a), *(volatile unsigned int __force *)(a) = (v))
+#define __raw_writel(v,a) (__chk_io_ptr(a), BUILD_BUG_ON_ZERO(__alignof(*(int *)a) != 4), *(volatile unsigned int __force *)(a) = (v))
#define __raw_readb(a) (__chk_io_ptr(a), *(volatile unsigned char __force *)(a))
#define __raw_readw(a) (__chk_io_ptr(a), *(volatile unsigned short __force *)(a))
-#define __raw_readl(a) (__chk_io_ptr(a), *(volatile unsigned int __force *)(a))
+#define __raw_readl(a) (__chk_io_ptr(a), BUILD_BUG_ON_ZERO(__alignof(*(int *)a) != 4), *(volatile unsigned int __force *)(a))
/*
* Architecture ioremap implementation.
And similar for readh/writeh, given that your GCC version is preserving
the alignment attribute across the cast of course.
[...]
Scratch that. The alignment of a void pointer dereference is 1.
Nicolas
On Mon, Jun 20, 2011 at 10:26:37PM +0200, Arnd Bergmann wrote:
> * We already need a compiler barrier in the non-_relaxed() versions of
> the I/O accessors, which will force a reload of the base address
> in a lot of cases, so the code is already suboptimal. Yes, we don't
> have the barrier today without CONFIG_ARM_DMA_MEM_BUFFERABLE, but that
> is a bug, because it lets the compiler move accesses to DMA buffers
> around readl/writel.
You're now being obtuse there. You don't need compiler barriers to
guarantee order - that's what volatile does there.
Before you start quoting stuff about volatile, look at the
volatile-considered-harmful.txt document:
- The above-mentioned accessor functions might use volatile on
architectures where direct I/O memory access does work. Essentially,
each accessor call becomes a little critical section on its own and
ensures that the access happens as expected by the programmer.
which is what we're doing here. And because each accessor is its own
little critical section, there's no need for a compiler barrier.
> > If it is the case that these structures do not require packing to get
> > their desired layout, then they don't require packing, and the packed
> > attribute should be dropped.
>
> Yes. But are you going to audit every other use of __packed in the kernel
> to check if it is used on __iomem pointers?
As I've said, using __packed on __iomem pointers is fraught for many
reasons, and ignoring the other reasons and just concentrating on the
IO accessor problem is bad news in any case.
So yes, __packed needs to be solved _irrespective_ of the IO accessor
issue.
On Mon, 20 Jun 2011, Arnd Bergmann wrote:
> On Monday 20 June 2011 22:28:49 Nicolas Pitre wrote:
> > On Mon, 20 Jun 2011, Arnd Bergmann wrote:
> >
> > > On Monday 20 June 2011 19:39:34 Alexander Holler wrote:
> > > > That packed without an additional aligned() caused errors on ARM with
> > > > gcc 4.6 is another problem which got (currently) fixed by removing packed.
> > >
> > > Packed caused errors because it is *wrong*. The code as it was used undefined
> > > behavior in the language.
> >
> > I wouldn't call this issue as such, but this is a Red herring.
> >
> > Could you please provide a pointer to the structure definition so a
> > second opinion to the usefulness of __packed there could be provided?
>
> The structures in question are ehci_caps, ehci_regs and ehci_dbg_port.
OK. If anyone was still entertaining any doubts, those structures do
not require any packed attribute what so ever.
> The patch that remove the __packed attribute was 139540170 "USB: ehci:
> remove structure packing from ehci_def".
Too late for this, but for the record:
Reviewed-by: Nicolas Pitre <[email protected]>
Hopefully this thread still gathered enough wisdom about proper usage of
the packed and aligned attributes into the list archive.
Nicolas
On Mon, 20 Jun 2011, Nicolas Pitre wrote:
> On Mon, 20 Jun 2011, Alan Stern wrote:
>
> > On Mon, 20 Jun 2011, Alexander Holler wrote:
> >
> > > I see it that way: packed is needed to be sure that at least for struct
> > > ehci_regs there are no padding bytes inbetween the members.
> >
> > But is it _really_ needed?
> >
> > > It might
> > > work without, but that depends on the compiler (-version, architecture,
> > > whatever).
> >
> > Have there _ever_ been _any_ combinations of compiler, version,
> > architecture, whatever, that had unwanted padding bytes in this
> > structure?
>
> This can be determined by simple code inspection.
>
> If you must have struct members which are not aligned to their natural
> size then you need __packed. Example:
>
> struct foo {
> u8 a;
> u16 b;
> u32 c;
> u64 d;
> };
>
> Without __packed, there will be padding between a and b, and between c
> and d.
One byte of padding between a and b is enough. No more is needed, and
the compiler would have to be pretty stupid to add anything else.
> If the order of the members in this struct were reversed, then
> everything would be naturally aligned and no padding between members
> would be inserted.
>
> The size of structures is normally rounded up with padding to the size
> of the largest basic element it contains. Example:
>
> struct foo {
> u64 a;
> u8 b;
> };
>
> Here sizeof(struct foo) would return 16, even if the actual content
> occupies 9 bytes only. That's because the largest basic element is u64
> i.e. 8 bytes. Normally this trailing padding is not an issue, unless
> you have an array of such a struct or if it is a member of another
> struct. If you want to get rid of that padding, you need to use
> __packed again (which of course would make all subsequent instances of
> that structure in your array completely misaligned too).
>
> Two odd exceptions with the old ABI on ARM:
>
> - The alignment of a 64-bit value is always 4 bytes not 8.
>
> - The size of all structures are always rounded up to a 4-byte boundary,
> irrespective of their content.
>
> If you fall into none of the above issues, then you don't need any
> __packed, period.
We don't fall into any of these cases, and therefore as you say, we
don't need packed. Arnd and I have both explained this. So why do you
keep arguing that we do need it?
Alan Stern
On Mon, 20 Jun 2011, Arnd Bergmann wrote:
> On Monday 20 June 2011 20:39:02 Alan Stern wrote:
> > On Mon, 20 Jun 2011, Alexander Holler wrote:
> >
> > > I see it that way: packed is needed to be sure that at least for struct
> > > ehci_regs there are no padding bytes inbetween the members.
> >
> > But is it really needed?
>
> No. When the structure is marked packed, it's broken because it relies
> on undefined behavior. If it's not packed, there is no problem.
>
> > > It might
> > > work without, but that depends on the compiler (-version, architecture,
> > > whatever).
> >
> > Have there ever been any combinations of compiler, version,
> > architecture, whatever, that had unwanted padding bytes in this
> > structure?
>
> Only on compilers that are not able to build Linux kernels anyway.
Just as I thought. There's no reason to accept the proposed patch;
we're fine the way we are now.
Alan Stern
On Monday 20 June 2011 22:55:59 Russell King - ARM Linux wrote:
> On Mon, Jun 20, 2011 at 10:26:37PM +0200, Arnd Bergmann wrote:
> > * We already need a compiler barrier in the non-_relaxed() versions of
> > the I/O accessors, which will force a reload of the base address
> > in a lot of cases, so the code is already suboptimal. Yes, we don't
> > have the barrier today without CONFIG_ARM_DMA_MEM_BUFFERABLE, but that
> > is a bug, because it lets the compiler move accesses to DMA buffers
> > around readl/writel.
>
> You're now being obtuse there. You don't need compiler barriers to
> guarantee order - that's what volatile does there.
>
A simple counterexample:
int f(volatile unsigned long *v)
{
unsigned long a[2], ret;
a[0] = 1; /* initialize our DMA buffer */
a[1] = 2;
*v = (unsigned long)a; /* pass the address to the device, start DMA */
ret = *v; /* flush DMA by reading from mmio */
return ret + a[1]; /* return accumulated status from readl and from modified
DMA buffer */
}
arm-linux-gnueabi-gcc -Wall -O2 test.c -S
Without a barrier, the stores into the DMA buffer before the start are
lost, as is the load from the modified DMA buffer:
sub sp, sp, #8
add r3, sp, #0
str r3, [r0, #0]
ldr r0, [r0, #0]
adds r0, r0, #2
add sp, sp, #8
bx lr
Adding a memory clobber to the volatile dereference turns this into the
expected output:
sub sp, sp, #8
movs r3, #2
movs r2, #1
stmia sp, {r2, r3}
add r3, sp, #0
str r3, [r0, #0]
ldr r0, [r0, #0]
ldr r3, [sp, #4]
adds r0, r0, r3
add sp, sp, #8
bx lr
Now, the dma buffer is written before the volatile access, and read out
again afterwards.
Arnd
On Mon, 20 Jun 2011, Arnd Bergmann wrote:
> On Monday 20 June 2011 22:55:59 Russell King - ARM Linux wrote:
> > On Mon, Jun 20, 2011 at 10:26:37PM +0200, Arnd Bergmann wrote:
> > > * We already need a compiler barrier in the non-_relaxed() versions of
> > > the I/O accessors, which will force a reload of the base address
> > > in a lot of cases, so the code is already suboptimal. Yes, we don't
> > > have the barrier today without CONFIG_ARM_DMA_MEM_BUFFERABLE, but that
> > > is a bug, because it lets the compiler move accesses to DMA buffers
> > > around readl/writel.
> >
> > You're now being obtuse there. You don't need compiler barriers to
> > guarantee order - that's what volatile does there.
> >
>
> A simple counterexample:
>
>
> int f(volatile unsigned long *v)
> {
> unsigned long a[2], ret;
> a[0] = 1; /* initialize our DMA buffer */
> a[1] = 2;
> *v = (unsigned long)a; /* pass the address to the device, start DMA */
> ret = *v; /* flush DMA by reading from mmio */
> return ret + a[1]; /* return accumulated status from readl and from modified
> DMA buffer */
> }
>
> arm-linux-gnueabi-gcc -Wall -O2 test.c -S
>
> Without a barrier, the stores into the DMA buffer before the start are
> lost, as is the load from the modified DMA buffer:
>
> sub sp, sp, #8
> add r3, sp, #0
> str r3, [r0, #0]
> ldr r0, [r0, #0]
> adds r0, r0, #2
> add sp, sp, #8
> bx lr
>
> Adding a memory clobber to the volatile dereference turns this into the
> expected output:
>
> sub sp, sp, #8
> movs r3, #2
> movs r2, #1
> stmia sp, {r2, r3}
> add r3, sp, #0
> str r3, [r0, #0]
> ldr r0, [r0, #0]
> ldr r3, [sp, #4]
> adds r0, r0, r3
> add sp, sp, #8
> bx lr
>
> Now, the dma buffer is written before the volatile access, and read out
> again afterwards.
This example is flawed. The DMA API documentation already forbids DMA to
the stack because of cache line sharing issues. If you declare your
buffer outside of the function body, the compiler can't optimize away
the buffer store anymore, and this example works as expected without any
memory clobber.
Nicolas
On Mon, 20 Jun 2011, Alan Stern wrote:
> On Mon, 20 Jun 2011, Nicolas Pitre wrote:
>
> > On Mon, 20 Jun 2011, Alan Stern wrote:
> >
> > > On Mon, 20 Jun 2011, Alexander Holler wrote:
> > >
> > > > I see it that way: packed is needed to be sure that at least for struct
> > > > ehci_regs there are no padding bytes inbetween the members.
> > >
> > > But is it _really_ needed?
> > >
> > > > It might
> > > > work without, but that depends on the compiler (-version, architecture,
> > > > whatever).
> > >
> > > Have there _ever_ been _any_ combinations of compiler, version,
> > > architecture, whatever, that had unwanted padding bytes in this
> > > structure?
> >
> > This can be determined by simple code inspection.
> >
> > If you must have struct members which are not aligned to their natural
> > size then you need __packed. Example:
> >
> > struct foo {
> > u8 a;
> > u16 b;
> > u32 c;
> > u64 d;
> > };
> >
> > Without __packed, there will be padding between a and b, and between c
> > and d.
>
> One byte of padding between a and b is enough. No more is needed, and
> the compiler would have to be pretty stupid to add anything else.
Obviously, my mistake. I meant to make c a u16 too but failed to
correct the example before posting.
> > If the order of the members in this struct were reversed, then
> > everything would be naturally aligned and no padding between members
> > would be inserted.
> >
> > The size of structures is normally rounded up with padding to the size
> > of the largest basic element it contains. Example:
> >
> > struct foo {
> > u64 a;
> > u8 b;
> > };
> >
> > Here sizeof(struct foo) would return 16, even if the actual content
> > occupies 9 bytes only. That's because the largest basic element is u64
> > i.e. 8 bytes. Normally this trailing padding is not an issue, unless
> > you have an array of such a struct or if it is a member of another
> > struct. If you want to get rid of that padding, you need to use
> > __packed again (which of course would make all subsequent instances of
> > that structure in your array completely misaligned too).
> >
> > Two odd exceptions with the old ABI on ARM:
> >
> > - The alignment of a 64-bit value is always 4 bytes not 8.
> >
> > - The size of all structures are always rounded up to a 4-byte boundary,
> > irrespective of their content.
> >
> > If you fall into none of the above issues, then you don't need any
> > __packed, period.
>
> We don't fall into any of these cases, and therefore as you say, we
> don't need packed. Arnd and I have both explained this. So why do you
> keep arguing that we do need it?
Please show me where I keep arguing that you need it?
Nicolas
On Mon, 20 Jun 2011, Alan Stern wrote:
> On Mon, 20 Jun 2011, Nicolas Pitre wrote:
>
> > > As far as I can tell, the other structures in ehci.h have
> > > ((aligned(32)) simply in order to save space, since there can be large
> > > numbers of these structures allocated.
> >
> > How can increasing the alignment to 32 bytes save space?
>
> No, no -- the alignment is _decreased_ to 32 bits. Without the
> attribute the alignment would have been 64 bits.
The aligned attribute requires a byte value not a bit value.
Maybe what you meant is ((aligned(4)) ?
Nicolas
On Tuesday 21 June 2011, Nicolas Pitre wrote:
> On Mon, 20 Jun 2011, Arnd Bergmann wrote:
> This example is flawed. The DMA API documentation already forbids DMA to
> the stack because of cache line sharing issues. If you declare your
> buffer outside of the function body, the compiler can't optimize away
> the buffer store anymore, and this example works as expected without any
> memory clobber.
Ok, another example, even simpler:
int f(int *dma_buf, volatile int *mmio_reg)
{
(void) *mmio_reg; /* wait for DMA to complete */
return *dma_buf;
}
gcc-4.4, 4.5 and 4.6 all turn this into:
ldr r0, [r0, #0]
ldr r3, [r1, #0]
bx lr
which means that the dma_buf variable is dereferenced before the
volatile mmio_reg variable, which opens up a race: An interrupt may have
signalled us that a DMA is in progress, so we read a MMIO register from
the device (this is guaranteed to flush the DMA on PCI and similar buses).
If we read the dma_buf before we read the mmio register, the data we get
back may be stale.
Adding a barrier() between the two turns the assembly into the expected
ldr r3, [r1, #0]
ldr r0, [r0, #0]
bx lr
Arnd
On Mon, 20 Jun 2011, Nicolas Pitre wrote:
> > We don't fall into any of these cases, and therefore as you say, we
> > don't need packed. Arnd and I have both explained this. So why do you
> > keep arguing that we do need it?
>
> Please show me where I keep arguing that you need it?
Not explicitly perhaps. But you did write:
> Doesn't mean that because it used to work that it is strictly correct.
> Wouldn't be the first time that a GCC upgrade broke the kernel because
> the kernel wasn't describing things properly enough.
which strongly implies that "packed" is needed. You also wrote:
> Yes, but that's a consequence of not being able to access those fields
> in their naturally aligned position anymore. Hence the addition of the
> align attribute to tell the compiler that we know that the structure is
> still aligned to a certain degree letting the compiler to avoid
> byte-oriented instructions when possible.
which is predicated on the assumption that "packed" is needed.
Alan Stern
On Mon, 20 Jun 2011, Nicolas Pitre wrote:
> On Mon, 20 Jun 2011, Alan Stern wrote:
>
> > On Mon, 20 Jun 2011, Nicolas Pitre wrote:
> >
> > > > As far as I can tell, the other structures in ehci.h have
> > > > ((aligned(32)) simply in order to save space, since there can be large
> > > > numbers of these structures allocated.
> > >
> > > How can increasing the alignment to 32 bytes save space?
> >
> > No, no -- the alignment is _decreased_ to 32 bits. Without the
> > attribute the alignment would have been 64 bits.
>
> The aligned attribute requires a byte value not a bit value.
> Maybe what you meant is ((aligned(4)) ?
Ah, very good point! No, I meant ((aligned(32))). Do "grep aligned
drivers/usb/host/ehci.h" and you'll see.
So my understanding was wrong; these structure really are being forced
to a strict alignment. And indeed, now that I go back and look at the
EHCI spec, it turns out that this alignment is required by the
hardware.
Okay, so this digression was irrelevant to our discussion. Forget I
mentioned it...
Alan Stern
On Tue, 21 Jun 2011, Alan Stern wrote:
> On Mon, 20 Jun 2011, Nicolas Pitre wrote:
>
> > > We don't fall into any of these cases, and therefore as you say, we
> > > don't need packed. Arnd and I have both explained this. So why do you
> > > keep arguing that we do need it?
> >
> > Please show me where I keep arguing that you need it?
>
> Not explicitly perhaps. But you did write:
>
> > Doesn't mean that because it used to work that it is strictly correct.
> > Wouldn't be the first time that a GCC upgrade broke the kernel because
> > the kernel wasn't describing things properly enough.
>
> which strongly implies that "packed" is needed. You also wrote:
In this case ...
> > Yes, but that's a consequence of not being able to access those fields
> > in their naturally aligned position anymore. Hence the addition of the
> > align attribute to tell the compiler that we know that the structure is
> > still aligned to a certain degree letting the compiler to avoid
> > byte-oriented instructions when possible.
>
> which is predicated on the assumption that "packed" is needed.
... and also in this case, I was talking about proper use of the packed
attribute in general, not at all about a specific case. I wanted to
provide a broader view to some people who expressed doubts and
misunderstanding in the hope that the archive could keep this knowledge
base available.
I apologize if that wasn't clear to you.
Nicolas
Hello,
On 21.06.2011 22:41, Nicolas Pitre wrote:
> ... and also in this case, I was talking about proper use of the packed
> attribute in general, not at all about a specific case. I wanted to
> provide a broader view to some people who expressed doubts and
> misunderstanding in the hope that the archive could keep this knowledge
> base available.
>
> I apologize if that wasn't clear to you.
If you meant the abomination writing one, thanks, but I never expressed
doubts or misunderstandings.
I said doubts may arise because the standard (C99) doesn't say anything
about if or how structs are packed (or aligned).
Alexander
Am 20.06.2011 22:07, schrieb Arnd Bergmann:
> On Monday 20 June 2011 19:39:34 Alexander Holler wrote:
>> That packed without an additional aligned() caused errors on ARM with
>> gcc 4.6 is another problem which got (currently) fixed by removing packed.
>
> Packed caused errors because it is *wrong*. The code as it was used undefined
> behavior in the language.
I don't see why using just packed was wrong. The problem occured because
the latest gcc now uses or inforces aligned(1) for a packed struct
without any aligned and because of that the assignment in readl() is
done byte by byte. I'm missing the required arm knowledge and experience
to discuss this further, I don't have a reason to look further into that
and never wanted to make any judgement about the cause.
>> But this introduces imho doubts and uncertainty about if padding bytes
>> could be between the members, therefore I would prefer to use packed
>> with aligned instead of removing the packed.
>
> Packing was never an issue here, please stop talking about it.
Sorry, I never wanted to talk about the issue itself (I've already said
that), I just wanted to bring in some additional clarity for people
looking at the code.
I think if there is a packed,aligned(4) most people reading that are
able to imaging how the struct looks like, whereas nothing (without
packed) might leave doubts which than requires to read compiler docs or
the generated code, if one searches a problem in that area.
Maybe my english is that bad that nobody understood that.
But it's ok. For me, that discussion was long over, two people already
said that they prefer the struct without any packed.
About the background:
I've posted that patch, because I though I might have been the source of
the removal of packed instead of using packed along with aligned,
because I first posted such (removing the packed) at the mailing list
for u-boot and only later on thought that using what was hinted to me
over a third person (packed, aligned(4), which means the one who
originally found and fixed the problem used packed, aligned(4) too)
might be better (what I than posted there too).
Sorry for becoming that verbose, I normally don't gabble that much and I
would like it if I never would have posted that silly patch.
Regards,
Alexander
On Thu, 23 Jun 2011, Alexander Holler wrote:
> Sorry, I never wanted to talk about the issue itself (I've already said
> that), I just wanted to bring in some additional clarity for people
> looking at the code.
>
> I think if there is a packed,aligned(4) most people reading that are
> able to imaging how the struct looks like, whereas nothing (without
> packed) might leave doubts which than requires to read compiler docs or
> the generated code, if one searches a problem in that area.
I disagree. If there are no annotations at all (no packed), there
should be no doubts. The compiler will add padding wherever it is
needed for internal alignment and perhaps also at the end of the
structure. Nowhere else.
Alan Stern
Am 23.06.2011 16:25, schrieb Alan Stern:
> On Thu, 23 Jun 2011, Alexander Holler wrote:
>
>> Sorry, I never wanted to talk about the issue itself (I've already said
>> that), I just wanted to bring in some additional clarity for people
>> looking at the code.
>>
>> I think if there is a packed,aligned(4) most people reading that are
>> able to imaging how the struct looks like, whereas nothing (without
>> packed) might leave doubts which than requires to read compiler docs or
>> the generated code, if one searches a problem in that area.
>
> I disagree. If there are no annotations at all (no packed), there
> should be no doubts. The compiler will add padding wherever it is
> needed for internal alignment and perhaps also at the end of the
> structure. Nowhere else.
I agree to disagree but I assume thats ok. ;)
Let me finally add some maybe interesting or informational points for
those who are working or examing the issue and/or who might be involved
in other discussions on the reason for removing the packed:
- I didn't have any problems booting from ehci with kernels compiled
with gcc 4.6 on armv5 (or x86*).
- 2.6.38.4 (and below) compiled with gcc 4.6 booted from ehci (on a
classic beagleboard c4, armv7), whereas everything from 2.6.38.5 upwards
didn't (same compiler, same config). I've discovered that before having
seen that this might be the issue with the packed, therefor I haven't
tested if 2.6.38.5 might work without a packed and have just used gcc
4.5.x for 2.6.38.x. I have tested that a 2,6,39.x compiled with gcc 4.6
and with a removed packed boots from ehci on the beagleboard, so the
patch which removes the packed might be a candidate for the stable tree.
The reason why booting from ehci stopped with 2.6.38.5+ (gcc 4.6) might
be interesting for someone. Looking at the git log I haven't seen
something special and I don't know why anything below 2.6.38.5 worked
with gcc 4.6 and the packed.
- I don't like the idea that every member of every packed struct
(without an aligned) might be handled byte by byte. It might be
necessary but I still don't like it and would prefer the old behaviour
of gcc. I've added this point just to express my personal humble opinion
and I don't want to get involved in a discussion on that topic. ;)
I've just got involved on that topic by accident and never have had a
real reason to do something there (I've done that just for fun).
Therefore I now prefer to disappear, which means there is absolutely no
reason to respond (to me) or to explain anything to me.
Regards and sorry if I wasted someones time,
Alexander