2009-03-04 13:51:25

by Johannes Weiner

[permalink] [raw]
Subject: [patch -v2] flat: fix data sections alignment

From: Oskar Schirmer <[email protected]>

The flat loader uses an architecture's flat_stack_align() to align the
stack but assumes word-alignment is enough for the data sections.

However, on the Xtensa S6000 we have registers up to 128bit width
which can be used from userspace and therefor need userspace stack and
data-section alignment of at least this size.

This patch drops flat_stack_align() and uses the same alignment that
is required for slab caches, ARCH_SLAB_MINALIGN, or wordsize if it's
not defined by the architecture.

It also fixes m32r which was obviously kaput, aligning an
uninitialized stack entry instead of the stack pointer.

Signed-off-by: Oskar Schirmer <[email protected]>
Cc: David Howells <[email protected]>
Cc: Russell King <[email protected]>
Cc: Bryan Wu <[email protected]>
Cc: Geert Uytterhoeven <[email protected]>
Cc: Paul Mundt <[email protected]>
Cc: Greg Ungerer <[email protected]>
Signed-off-by: Johannes Weiner <[email protected]>
---
arch/arm/include/asm/flat.h | 3 ---
arch/blackfin/include/asm/flat.h | 1 -
arch/h8300/include/asm/flat.h | 1 -
arch/m68k/include/asm/flat.h | 1 -
arch/sh/include/asm/flat.h | 1 -
fs/binfmt_flat.c | 37 ++++++++++++++++++++++---------------
include/asm-m32r/flat.h | 1 -
7 files changed, 22 insertions(+), 23 deletions(-)

Paul, please note that on sh ARCH_SLAB_MINALIGN is defined to be 8
while the userspace stack was aligned to 4 before. I suppose aligning
the stack (and data sections) to 8 as well is the right thing...?

version 2:
Align ARCH_SLAB_MINALIGN if defined, fall back to wordsize, drop all
the flat_stack_align()s as they mean the same

--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -41,6 +41,7 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
+#include <asm/page.h>

/****************************************************************************/

@@ -54,6 +55,12 @@
#define DBG_FLT(a...)
#endif

+#ifdef ARCH_SLAB_MINALIGN
+#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
+#else
+#define FLAT_DATA_ALIGN (sizeof(void *))
+#endif
+
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */

@@ -114,20 +121,18 @@ static unsigned long create_flat_tables(
int envc = bprm->envc;
char uninitialized_var(dummy);

- sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p);
-
- sp -= envc+1;
- envp = sp;
- sp -= argc+1;
- argv = sp;
+ sp = (unsigned long *)p;
+ sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
+ sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+ argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
+ envp = argv + (argc + 1);

- flat_stack_align(sp);
if (flat_argvp_envp_on_stack()) {
- --sp; put_user((unsigned long) envp, sp);
- --sp; put_user((unsigned long) argv, sp);
+ put_user((unsigned long) envp, sp + 2);
+ put_user((unsigned long) argv, sp + 1);
}

- put_user(argc,--sp);
+ put_user(argc,sp);
current->mm->arg_start = (unsigned long) p;
while (argc-->0) {
put_user((unsigned long) p, argv++);
@@ -558,7 +563,8 @@ static int load_flat_file(struct linux_b
ret = realdatastart;
goto err;
}
- datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
+ datapos = ALIGN(realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long),
+ FLAT_DATA_ALIGN);

DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n",
(int)(data_len + bss_len + stack_len), (int)datapos);
@@ -604,9 +610,10 @@ static int load_flat_file(struct linux_b
}

realdatastart = textpos + ntohl(hdr->data_start);
- datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
- reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) +
- MAX_SHARED_LIBS * sizeof(unsigned long));
+ datapos = ALIGN(realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long),
+ FLAT_DATA_ALIGN);
+
+ reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
memp = textpos;
memp_size = len;
#ifdef CONFIG_BINFMT_ZFLAT
@@ -854,7 +861,7 @@ static int load_flat_binary(struct linux
stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
-
+ stack_len += FLAT_DATA_ALIGN;

res = load_flat_file(bprm, &libinfo, 0, &stack_len);
if (res > (unsigned long)-4096)
--- a/arch/arm/include/asm/flat.h
+++ b/arch/arm/include/asm/flat.h
@@ -5,9 +5,6 @@
#ifndef __ARM_FLAT_H__
#define __ARM_FLAT_H__

-/* An odd number of words will be pushed after this alignment, so
- deliberately misalign the value. */
-#define flat_stack_align(sp) sp = (void *)(((unsigned long)(sp) - 4) | 4)
#define flat_argvp_envp_on_stack() 1
#define flat_old_ram_flag(flags) (flags)
#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
--- a/arch/blackfin/include/asm/flat.h
+++ b/arch/blackfin/include/asm/flat.h
@@ -10,7 +10,6 @@

#include <asm/unaligned.h>

-#define flat_stack_align(sp) /* nothing needed */
#define flat_argvp_envp_on_stack() 0
#define flat_old_ram_flag(flags) (flags)

--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -5,7 +5,6 @@
#ifndef __H8300_FLAT_H__
#define __H8300_FLAT_H__

-#define flat_stack_align(sp) /* nothing needed */
#define flat_argvp_envp_on_stack() 1
#define flat_old_ram_flag(flags) 1
#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -5,7 +5,6 @@
#ifndef __M68KNOMMU_FLAT_H__
#define __M68KNOMMU_FLAT_H__

-#define flat_stack_align(sp) /* nothing needed */
#define flat_argvp_envp_on_stack() 1
#define flat_old_ram_flag(flags) (flags)
#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -12,7 +12,6 @@
#ifndef __ASM_SH_FLAT_H
#define __ASM_SH_FLAT_H

-#define flat_stack_align(sp) /* nothing needed */
#define flat_argvp_envp_on_stack() 0
#define flat_old_ram_flag(flags) (flags)
#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
--- a/include/asm-m32r/flat.h
+++ b/include/asm-m32r/flat.h
@@ -12,7 +12,6 @@
#ifndef __ASM_M32R_FLAT_H
#define __ASM_M32R_FLAT_H

-#define flat_stack_align(sp) (*sp += (*sp & 3 ? (4 - (*sp & 3)): 0))
#define flat_argvp_envp_on_stack() 0
#define flat_old_ram_flag(flags) (flags)
#define flat_set_persistent(relval, p) 0


2009-03-04 18:04:17

by Mike Frysinger

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 4, 2009 at 08:51, Johannes Weiner wrote:
> The flat loader uses an architecture's flat_stack_align() to align the
> stack but assumes word-alignment is enough for the data sections.
>
> However, on the Xtensa S6000 we have registers up to 128bit width
> which can be used from userspace and therefor need userspace stack and
> data-section alignment of at least this size.

could this perhaps be a gcc problem ? x86 has a similar problem with
sse and they addressed it with a function attribute. after all, just
because your stack started out 128bit aligned doesnt mean gcc will
keep it that way when calling other functions. so having the stack
start out aligned would only "fix" the stack for the application's
entry point right (which would in practice bubble up to main()) ? so
you'd be right back where you started ...
-mike

2009-03-04 19:34:35

by Johannes Weiner

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 04, 2009 at 01:04:00PM -0500, Mike Frysinger wrote:
> On Wed, Mar 4, 2009 at 08:51, Johannes Weiner wrote:
> > The flat loader uses an architecture's flat_stack_align() to align the
> > stack but assumes word-alignment is enough for the data sections.
> >
> > However, on the Xtensa S6000 we have registers up to 128bit width
> > which can be used from userspace and therefor need userspace stack and
> > data-section alignment of at least this size.
>
> could this perhaps be a gcc problem ? x86 has a similar problem with
> sse and they addressed it with a function attribute. after all, just
> because your stack started out 128bit aligned doesnt mean gcc will
> keep it that way when calling other functions. so having the stack
> start out aligned would only "fix" the stack for the application's
> entry point right (which would in practice bubble up to main()) ? so
> you'd be right back where you started ...

gcc generates sp changes only ever in multiples of 16 deltas, I just
checked it again with various amounts of stack variables.

The stack frames allocate themselves with an ENTRY instruction and the
generated code I read here allocates stack frames of n * 16 bytes.

So we are good to go as long as the initial stack frame is properly
aligned.

> -mike

Hannes

2009-03-04 20:00:37

by Mike Frysinger

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 4, 2009 at 14:33, Johannes Weiner wrote:
> On Wed, Mar 04, 2009 at 01:04:00PM -0500, Mike Frysinger wrote:
>> On Wed, Mar 4, 2009 at 08:51, Johannes Weiner wrote:
>> > The flat loader uses an architecture's flat_stack_align() to align the
>> > stack but assumes word-alignment is enough for the data sections.
>> >
>> > However, on the Xtensa S6000 we have registers up to 128bit width
>> > which can be used from userspace and therefor need userspace stack and
>> > data-section alignment of at least this size.
>>
>> could this perhaps be a gcc problem ? ?x86 has a similar problem with
>> sse and they addressed it with a function attribute. ?after all, just
>> because your stack started out 128bit aligned doesnt mean gcc will
>> keep it that way when calling other functions. ?so having the stack
>> start out aligned would only "fix" the stack for the application's
>> entry point right (which would in practice bubble up to main()) ? ?so
>> you'd be right back where you started ...
>
> gcc generates sp changes only ever in multiples of 16 deltas, I just
> checked it again with various amounts of stack variables.
>
> The stack frames allocate themselves with an ENTRY instruction and the
> generated code I read here allocates stack frames of n * 16 bytes.
>
> So we are good to go as long as the initial stack frame is properly
> aligned.

throwing a few random cases at gcc isnt really a good way to validate.
this would have worked for x86 too with older versions. only when
common code in later gcc versions got more aggressive with stack
packing did people notice the issue.

so, lets look at the authoritative place: the gcc source code for xtensa

$ grep define.*STACK_BOUNDARY -B 2 gcc/config/xtensa/*.h
xtensa.h-/* Align stack frames on 128 bits for Xtensa. This is necessary for
xtensa.h- 128-bit datatypes defined in TIE (e.g., for Vectra). */
xtensa.h:#define STACK_BOUNDARY 128

ok, now i believe that forcing a stack alignment of 128bits in the
kernel is correct ;)
-mike

2009-03-04 20:14:13

by Johannes Weiner

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 04, 2009 at 03:00:25PM -0500, Mike Frysinger wrote:
> On Wed, Mar 4, 2009 at 14:33, Johannes Weiner wrote:
> > On Wed, Mar 04, 2009 at 01:04:00PM -0500, Mike Frysinger wrote:
> >> On Wed, Mar 4, 2009 at 08:51, Johannes Weiner wrote:
> >> > The flat loader uses an architecture's flat_stack_align() to align the
> >> > stack but assumes word-alignment is enough for the data sections.
> >> >
> >> > However, on the Xtensa S6000 we have registers up to 128bit width
> >> > which can be used from userspace and therefor need userspace stack and
> >> > data-section alignment of at least this size.
> >>
> >> could this perhaps be a gcc problem ? ?x86 has a similar problem with
> >> sse and they addressed it with a function attribute. ?after all, just
> >> because your stack started out 128bit aligned doesnt mean gcc will
> >> keep it that way when calling other functions. ?so having the stack
> >> start out aligned would only "fix" the stack for the application's
> >> entry point right (which would in practice bubble up to main()) ? ?so
> >> you'd be right back where you started ...
> >
> > gcc generates sp changes only ever in multiples of 16 deltas, I just
> > checked it again with various amounts of stack variables.
> >
> > The stack frames allocate themselves with an ENTRY instruction and the
> > generated code I read here allocates stack frames of n * 16 bytes.
> >
> > So we are good to go as long as the initial stack frame is properly
> > aligned.
>
> throwing a few random cases at gcc isnt really a good way to validate.
> this would have worked for x86 too with older versions. only when
> common code in later gcc versions got more aggressive with stack
> packing did people notice the issue.
>
> so, lets look at the authoritative place: the gcc source code for xtensa
>
> $ grep define.*STACK_BOUNDARY -B 2 gcc/config/xtensa/*.h
> xtensa.h-/* Align stack frames on 128 bits for Xtensa. This is necessary for
> xtensa.h- 128-bit datatypes defined in TIE (e.g., for Vectra). */
> xtensa.h:#define STACK_BOUNDARY 128
>
> ok, now i believe that forcing a stack alignment of 128bits in the
> kernel is correct ;)

Now I do too. Heh.

Seriously, thanks for fishing this out. Is this an Ack? ;)

> -mike

Hannes

2009-03-04 21:48:19

by Mike Frysinger

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 4, 2009 at 08:51, Johannes Weiner wrote:
> --- a/fs/binfmt_flat.c
> +++ b/fs/binfmt_flat.c
> +#ifdef ARCH_SLAB_MINALIGN
> +#define FLAT_DATA_ALIGN ? ? ? ?(ARCH_SLAB_MINALIGN)
> +#else
> +#define FLAT_DATA_ALIGN ? ? ? ?(sizeof(void *))
> +#endif
> ...
> + sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
> ...
> - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
> + datapos = ALIGN(realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long),
> + FLAT_DATA_ALIGN);
> ...
> - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
> + datapos = ALIGN(realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long),
> + FLAT_DATA_ALIGN);

why not make FLAT_DATA_ALIGN into a macro ? then it'd naturally
follow the existing ALIGN() behavior.

> - ? ? ? sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p);
> -
> - ? ? ? sp -= envc+1;
> - ? ? ? envp = sp;
> - ? ? ? sp -= argc+1;
> - ? ? ? argv = sp;
> + ? ? ? sp = (unsigned long *)p;
> + ? ? ? sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
> + ? ? ? sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
> + ? ? ? argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
> + ? ? ? envp = argv + (argc + 1);

can this be cleaned up a bit so that the argv/envp assignment happens
by using sp before aligning sp ? that would be defensive coding wrt
preventing sp adjustment falling out of line with argv initialization,
and cut down on duplicated code.

> - ? ? ? put_user(argc,--sp);
> + ? ? ? put_user(argc,sp);

might as well fix the style issues here while you're changing code ...
stick a space after the comma

> @@ -854,7 +861,7 @@ static int load_flat_binary(struct linux
> ? ? ? ?stack_len = TOP_OF_ARGS - bprm->p; ? ? ? ? ? ? /* the strings */
> ? ? ? ?stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
> ? ? ? ?stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
> -
> + ? ? ? stack_len += FLAT_DATA_ALIGN;

this seems weird. alignment is for aligning data, not padding it out
some value ...
-mike

2009-03-05 08:47:21

by Paul Mundt

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 04, 2009 at 02:51:17PM +0100, Johannes Weiner wrote:
> Paul, please note that on sh ARCH_SLAB_MINALIGN is defined to be 8
> while the userspace stack was aligned to 4 before. I suppose aligning
> the stack (and data sections) to 8 as well is the right thing...?
>
This is intentional. As I noted before, the ARCH_SLAB_MINALIGN on SH
refers specifically to SH-5 (or anything implementing a 32-bit sh64 ABI),
which presently does not support nommu, but could in theory. The SH parts
that do nommu today generally need ARCH_KMALLOC_MINALIGN, but do not have
the ARCH_SLAB_MINALIGN requirement that SH-5 does.

> --- a/fs/binfmt_flat.c
> +++ b/fs/binfmt_flat.c
> @@ -41,6 +41,7 @@
> #include <asm/uaccess.h>
> #include <asm/unaligned.h>
> #include <asm/cacheflush.h>
> +#include <asm/page.h>
>
> /****************************************************************************/
>
> @@ -54,6 +55,12 @@
> #define DBG_FLT(a...)
> #endif
>
> +#ifdef ARCH_SLAB_MINALIGN
> +#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
> +#else
> +#define FLAT_DATA_ALIGN (sizeof(void *))
> +#endif
> +
As it's not entirely obvious what this is used for outside of the slab
context, you will want to have a comment here explaining the situation,
and particularly what the implication for stack alignment is.

2009-03-05 10:55:05

by Johannes Weiner

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 04, 2009 at 04:48:04PM -0500, Mike Frysinger wrote:
> On Wed, Mar 4, 2009 at 08:51, Johannes Weiner wrote:
> > --- a/fs/binfmt_flat.c
> > +++ b/fs/binfmt_flat.c
> > +#ifdef ARCH_SLAB_MINALIGN
> > +#define FLAT_DATA_ALIGN ? ? ? ?(ARCH_SLAB_MINALIGN)
> > +#else
> > +#define FLAT_DATA_ALIGN ? ? ? ?(sizeof(void *))
> > +#endif
> > ...
> > + sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
> > ...
> > - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
> > + datapos = ALIGN(realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long),
> > + FLAT_DATA_ALIGN);
> > ...
> > - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
> > + datapos = ALIGN(realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long),
> > + FLAT_DATA_ALIGN);
>
> why not make FLAT_DATA_ALIGN into a macro ? then it'd naturally
> follow the existing ALIGN() behavior.

datapos is aligned to the next higher bound while the stack pointer
grows down and is therefor aligned to the next lower bound. A
FLAT_DATA_ALIGN() doesn't make sense.

> > - ? ? ? sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p);
> > -
> > - ? ? ? sp -= envc+1;
> > - ? ? ? envp = sp;
> > - ? ? ? sp -= argc+1;
> > - ? ? ? argv = sp;
> > + ? ? ? sp = (unsigned long *)p;
> > + ? ? ? sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
> > + ? ? ? sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
> > + ? ? ? argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
> > + ? ? ? envp = argv + (argc + 1);

Hannes

2009-03-05 16:43:11

by Oskar Schirmer

[permalink] [raw]
Subject: Re: [patch -v2] flat: fix data sections alignment

On Wed, Mar 04, 2009 at 16:48:04 -0500, Mike Frysinger wrote:
> On Wed, Mar 4, 2009 at 08:51, Johannes Weiner wrote:
> > -       sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p);
> > -
> > -       sp -= envc+1;
> > -       envp = sp;
> > -       sp -= argc+1;
> > -       argv = sp;
> > +       sp = (unsigned long *)p;
> > +       sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
> > +       sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
> > +       argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
> > +       envp = argv + (argc + 1);
>
> can this be cleaned up a bit so that the argv/envp assignment happens
> by using sp before aligning sp ? that would be defensive coding wrt
> preventing sp adjustment falling out of line with argv initialization,
> and cut down on duplicated code.

The stack grows down and needs to be aligned when done,
i.e. where it's user space's turn. Therefor, we need to
first calculate the amount of space we need for argv/envp,
then align the result, and finally push argv/envp backward
into the reserved space. Note, that all this was done
before too, with one difference: Alignment was requested
in the middle of the calculation, which is nonsense (as
the comment in the ARM flat.h prooved).

> > @@ -854,7 +861,7 @@ static int load_flat_binary(struct linux
> >        stack_len = TOP_OF_ARGS - bprm->p;             /* the strings */
> >        stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
> >        stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
> > -
> > +       stack_len += FLAT_DATA_ALIGN;
>
> this seems weird. alignment is for aligning data, not padding it out
> some value ...

stack_len is the minimum amount of space to reserve
for the stack later on. As the stack pointer will be
aligned after pushing argv/envp (see above), we need
to reserve the additional space for maximum possible
alignment upon allocation.

Oskar