2007-08-08 14:02:26

by Jiri Kosina

[permalink] [raw]
Subject: [PATCH] [RESEND] PIE executable randomization

Hi Andrew,

below is a respin of the patch for executable code address randomization
of PIE binaries, which already went through -mm into 2.6.22, but then got
reverted because of bugreports stating that klibc binaries segfault due to
this patch. However it turned out that this was bug in klibc 1.4 which has
already been fixed in klibc 1.5 [1].

I have rebased the patch against 2.6.23-rc1-mm2 and fixed a BAD_ADDR
macro, which was wrong in the original patch and fix for which was
probably missed [2].

[1] http://lkml.org/lkml/2007/8/2/359
[2] http://lkml.org/lkml/2007/7/7/50

=======

This patch is using mmap()'s randomization functionality in such a way
that it maps the main executable of (specially compiled/linked -pie/-fpie)
ET_DYN binaries onto a random address (in cases in which mmap() is allowed
to perform a randomization).

The code has been extraced from Ingo's exec-shield patch
http://people.redhat.com/mingo/exec-shield/

Cc: Ingo Molnar <[email protected]>
Cc: Roland McGrath <[email protected]>
Cc: Jakub Jelinek <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ulrich Kunitz <[email protected]>
Cc: Bret Towe <[email protected]>
Signed-off-by: Jiri Kosina <[email protected]>

arch/ia64/ia32/binfmt_elf32.c | 2 +-
fs/binfmt_elf.c | 107 ++++++++++++++++++++++++++++++++--------
2 files changed, 86 insertions(+), 23 deletions(-)

diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index f6ae3ec..3db699b 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -226,7 +226,7 @@ elf32_set_personality (void)
}

static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
{
unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e45c60b..8c4b7fa 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@

static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
+static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);

/*
* If we don't support core dumping, then supply a NULL so we
@@ -295,33 +295,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
#ifndef elf_map

static unsigned long elf_map(struct file *filep, unsigned long addr,
- struct elf_phdr *eppnt, int prot, int type)
+ struct elf_phdr *eppnt, int prot, int type,
+ unsigned long total_size)
{
unsigned long map_addr;
- unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
+ unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
+ unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
+ addr = ELF_PAGESTART(addr);
+ size = ELF_PAGEALIGN(size);

- down_write(&current->mm->mmap_sem);
/* mmap() will return -EINVAL if given a zero size, but a
* segment with zero filesize is perfectly valid */
- if (eppnt->p_filesz + pageoffset)
- map_addr = do_mmap(filep, ELF_PAGESTART(addr),
- eppnt->p_filesz + pageoffset, prot, type,
- eppnt->p_offset - pageoffset);
- else
- map_addr = ELF_PAGESTART(addr);
+ if (!size)
+ return addr;
+
+ down_write(&current->mm->mmap_sem);
+ /*
+ * total_size is the size of the ELF (interpreter) image.
+ * The _first_ mmap needs to know the full size, otherwise
+ * randomization might put this image into an overlapping
+ * position with the ELF binary image. (since size < total_size)
+ * So we first map the 'big' image - and unmap the remainder at
+ * the end. (which unmap is needed for ELF images with holes.)
+ */
+ if (total_size) {
+ total_size = ELF_PAGEALIGN(total_size);
+ map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+ if (!BAD_ADDR(map_addr))
+ do_munmap(current->mm, map_addr+size, total_size-size);
+ } else
+ map_addr = do_mmap(filep, addr, size, prot, type, off);
+
up_write(&current->mm->mmap_sem);
return(map_addr);
}

#endif /* !elf_map */

+static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+{
+ int i, first_idx = -1, last_idx = -1;
+
+ for (i = 0; i < nr; i++) {
+ if (cmds[i].p_type == PT_LOAD) {
+ last_idx = i;
+ if (first_idx == -1)
+ first_idx = i;
+ }
+ }
+ if (first_idx == -1)
+ return 0;
+
+ return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
+ ELF_PAGESTART(cmds[first_idx].p_vaddr);
+}
+
+
/* This is much more generalized than the library routine read function,
so we keep this separate. Technically the library read function
is only provided so that we can read a.out libraries that have
an ELF header */

static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
- struct file *interpreter, unsigned long *interp_load_addr)
+ struct file *interpreter, unsigned long *interp_map_addr,
+ unsigned long no_base)
{
struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
@@ -329,6 +366,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
unsigned long error = ~0UL;
+ unsigned long total_size;
int retval, i, size;

/* First of all, some simple consistency checks */
@@ -367,6 +405,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out_close;
}

+ total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_close;
+ }
+
eppnt = elf_phdata;
for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
if (eppnt->p_type == PT_LOAD) {
@@ -384,9 +428,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
vaddr = eppnt->p_vaddr;
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED;
+ else if (no_base && interp_elf_ex->e_type == ET_DYN)
+ load_addr = -vaddr;

map_addr = elf_map(interpreter, load_addr + vaddr,
- eppnt, elf_prot, elf_type);
+ eppnt, elf_prot, elf_type, total_size);
+ total_size = 0;
+ if (!*interp_map_addr)
+ *interp_map_addr = map_addr;
error = map_addr;
if (BAD_ADDR(map_addr))
goto out_close;
@@ -452,8 +501,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out_close;
}

- *interp_load_addr = load_addr;
- error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
+ error = load_addr;

out_close:
kfree(elf_phdata);
@@ -550,7 +598,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
int elf_exec_fileno;
int retval, i;
unsigned int size;
- unsigned long elf_entry, interp_load_addr = 0;
+ unsigned long elf_entry;
+ unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc = 0;
char passed_fileno[6];
@@ -814,9 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->start_stack = bprm->p;

/* Now we do a little grungy work by mmaping the ELF image into
- the correct location in memory. At this point, we assume that
- the image should be loaded at fixed address, not at a variable
- address. */
+ the correct location in memory. */
for(i = 0, elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
@@ -870,11 +917,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
+#ifdef CONFIG_X86
+ load_bias = 0;
+#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+#endif
}

error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags);
+ elf_prot, elf_flags,0);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, 0);
retval = IS_ERR((void *)error) ?
@@ -950,13 +1001,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
}

if (elf_interpreter) {
- if (interpreter_type == INTERPRETER_AOUT)
+ if (interpreter_type == INTERPRETER_AOUT) {
elf_entry = load_aout_interp(&loc->interp_ex,
interpreter);
- else
+ } else {
+ unsigned long interp_map_addr; /* unused */
+
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
- &interp_load_addr);
+ &interp_map_addr,
+ load_bias);
+ if (!IS_ERR((void *)elf_entry)) {
+ /*
+ * load_elf_interp() returns relocation
+ * adjustment
+ */
+ interp_load_addr = elf_entry;
+ elf_entry += loc->interp_elf_ex.e_entry;
+ }
+ }
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = IS_ERR((void *)elf_entry) ?


2007-08-14 20:07:43

by Jakub Jelinek

[permalink] [raw]
Subject: Re: [PATCH] [RESEND] PIE executable randomization

On Wed, Aug 08, 2007 at 04:03:07PM +0200, Jiri Kosina wrote:
> @@ -870,11 +917,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
> * default mmap base, as well as whatever program they
> * might try to exec. This is because the brk will
> * follow the loader, and is not movable. */
> +#ifdef CONFIG_X86
> + load_bias = 0;
> +#else
> load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
> +#endif
> }
>
> error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
> - elf_prot, elf_flags);
> + elf_prot, elf_flags,0);
> if (BAD_ADDR(error)) {
> send_sig(SIGKILL, current, 0);
> retval = IS_ERR((void *)error) ?

If I'm reading the above hunk correctly, this means we will randomize
all PIEs and even all dynamic linkers invoked as executables on i?86 and
x86_64, and on the rest of arches we won't randomize at all, instead
load ET_DYN objects at ELF_ET_DYN_BASE address.

But I don't see anything i?86/x86_64 specific on this.

What would make much more sense to me would be conditionalizing on
whether we are loading a dynamic linker (in which case loading it
at ELF_ET_DYN_BASE is desirable or not (PIEs, ...; and for PIEs we
want to randomize on all architectures).

So something like
if (elf_interpreter)
load_bias = 0;
else
/* Probably dynamic linker invoked as
/lib*/ld*so* program args - load at
ELF_ET_DYN_BASE. */
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
instead of
#ifdef CONFIG_X86
load_bias = 0;
#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif

Jakub

2007-08-14 20:40:15

by Jiri Kosina

[permalink] [raw]
Subject: Re: [PATCH] [RESEND] PIE executable randomization

(added Arjan to CC, as he has been working on the kernel part of the
randomization previously)

On Tue, 14 Aug 2007, Jakub Jelinek wrote:

> If I'm reading the above hunk correctly, this means we will randomize
> all PIEs and even all dynamic linkers invoked as executables on i?86 and
> x86_64, and on the rest of arches we won't randomize at all, instead
> load ET_DYN objects at ELF_ET_DYN_BASE address. But I don't see anything
> i?86/x86_64 specific on this.

Hi Jakub,

actually, it is currently arch-specific, and that's because of different
memory layouts on different archs.

It turned out recently that PIE-compiled binaries on x86_64, that perform
larger amount of brk-allocations (for example bash) will not work (but
they will work on ?86). This is because currently on ?86 the memory layout
is as follows:

[TEXT][HEAP]...[MMAP area]..[STACK]..[VDSO]

for PIE-complied binaries, the situation is as follows (with the patch):

[MMAP area]...[TEXT][HEAP]..[STACK]..[VDSO]

which is perfectly fine (except for the non-randomized brk). However, on
x86_64, the memory layout is different:

[TEXT][HEAP][MMAP area]..[STACK]..[VDSO]

which directly shows why brk() doesn't work well here -- it very soon hits
another mmaped VMA.

I am currently thinking about the best way to address this issue -- I am
thinking about randomizing brk properly (which we want to do anyway), so
that it is placed in the area that doesn't overlap with mmap range.

> What would make much more sense to me would be conditionalizing on
> whether we are loading a dynamic linker (in which case loading it
> at ELF_ET_DYN_BASE is desirable or not (PIEs, ...; and for PIEs we
> want to randomize on all architectures).

Yes, I agree -- when we sort out the memory layout problems.

Thanks,

--
Jiri Kosina
SUSE Labs

2007-08-14 23:20:26

by Jiri Kosina

[permalink] [raw]
Subject: Re: [PATCH] [RESEND] PIE executable randomization

On Tue, 14 Aug 2007, Jiri Kosina wrote:

> It turned out recently that PIE-compiled binaries on x86_64, that
> perform larger amount of brk-allocations (for example bash) will not
> work (but they will work on ?86). This is because currently on ?86 the
> memory layout is as follows:

(Andi added to CC)

The following patch fixes the brk-allocation problems on x86_64 with code
randomization patch on PIE-compiled binaries. Is anyone aware of any
potential disaster it might cause somewhere please?

If not -- Andrew, could you apply it on top of
pie-executable-randomization.patch please? Thanks.



From: Jiri Kosina <[email protected]>

X86_64: add flexmmap support

This patch adds flexible-mmap support for x86_64 and brings the address
space layout closer to the "new" i?86 address space layout. Using the
legacy layout is still possible by

- ADDR_COMPAT_LAYOUT personality
- having unlimited resource limit for stack
- legacy_va_layout sysctl setting

This corresponds to the ?86 behavior.

Flexible-mmap support is necessary for establishing proper mapping when
performing executable code randomization for PIE-compiled binaries,
otherwise non-randomized brk, which is immediately following the code,
might not have enough free space.

Signed-off-by: Jiri Kosina <[email protected]>

arch/x86_64/mm/mmap.c | 107 ++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/arch/x86_64/mm/mmap.c b/arch/x86_64/mm/mmap.c
index 80bba0d..a5e658c 100644
--- a/arch/x86_64/mm/mmap.c
+++ b/arch/x86_64/mm/mmap.c
@@ -1,29 +1,106 @@
-/* Copyright 2005 Andi Kleen, SuSE Labs.
- * Licensed under GPL, v.2
+/*
+ * linux/arch/x86-64/mm/mmap.c
+ *
+ * flexible mmap layout support
+ *
+ * Based on code by Ingo Molnar and Andi Kleen, copyrighted
+ * as follows:
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ * Copyright 2005 Andi Kleen, SuSE Labs.
+ * Copyright 2007 Jiri Kosina, SuSE Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
*/
+
+#include <linux/personality.h>
#include <linux/mm.h>
-#include <linux/sched.h>
#include <linux/random.h>
+#include <linux/limits.h>
+#include <linux/sched.h>
#include <asm/ia32.h>

-/* Notebook: move the mmap code from sys_x86_64.c over here. */
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)

-void arch_pick_mmap_layout(struct mm_struct *mm)
+static inline unsigned long mmap_base(void)
+{
+ unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+
+ return TASK_SIZE - (gap & PAGE_MASK);
+}
+
+static inline int mmap_is_legacy(void)
{
#ifdef CONFIG_IA32_EMULATION
- if (current_thread_info()->flags & _TIF_IA32)
- return ia32_pick_mmap_layout(mm);
+ if (test_thread_flag(TIF_IA32))
+ return 1;
#endif
- mm->mmap_base = TASK_UNMAPPED_BASE;
+
+ if (current->personality & ADDR_COMPAT_LAYOUT)
+ return 1;
+
+ if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+ return 1;
+
+ return sysctl_legacy_va_layout;
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+ int rnd = 0;
if (current->flags & PF_RANDOMIZE) {
/* Add 28bit randomness which is about 40bits of address space
because mmap base has to be page aligned.
- or ~1/128 of the total user VM
- (total user address space is 47bits) */
- unsigned rnd = get_random_int() & 0xfffffff;
- mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
+ or ~1/128 of the total user VM
+ (total user address space is 47bits) */
+ rnd = get_random_int() & 0xfffffff;
}
- mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
-}

+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base();
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ if (current->flags & PF_RANDOMIZE)
+ rnd = -rnd;
+ }
+ if (current->flags & PF_RANDOMIZE) {
+ mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
+ }
+}

2007-08-15 17:30:19

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH] [RESEND] PIE executable randomization

Hi,

On Wed, Aug 15, 2007 at 01:21:37AM +0200, Jiri Kosina wrote:
> The following patch fixes the brk-allocation problems on x86_64 with code
> randomization patch on PIE-compiled binaries. Is anyone aware of any
> potential disaster it might cause somewhere please?

(Adding myself to this thread...)

I've tested this on x86_64 now (the prior patch failed on brk, as
mentioned). This version passes my regression tests. I'd like to
double-check this on i386 with unlimited stack (the situation that ran
into problems back with 2.6.20's version of text ASLR).

For anyone interested, I have a few ASLR and VM checking tools here:
http://outflux.net/aslr/

Thanks,

-Kees

--
Kees Cook @outflux.net

2007-08-15 17:40:44

by Chuck Ebbert

[permalink] [raw]
Subject: Re: [PATCH] [RESEND] PIE executable randomization

On 08/14/2007 04:41 PM, Jiri Kosina wrote:
> (added Arjan to CC, as he has been working on the kernel part of the
> randomization previously)
>
> On Tue, 14 Aug 2007, Jakub Jelinek wrote:
>
>> If I'm reading the above hunk correctly, this means we will randomize
>> all PIEs and even all dynamic linkers invoked as executables on i?86 and
>> x86_64, and on the rest of arches we won't randomize at all, instead
>> load ET_DYN objects at ELF_ET_DYN_BASE address. But I don't see anything
>> i?86/x86_64 specific on this.
>
> Hi Jakub,
>
> actually, it is currently arch-specific, and that's because of different
> memory layouts on different archs.
>
> It turned out recently that PIE-compiled binaries on x86_64, that perform
> larger amount of brk-allocations (for example bash) will not work (but
> they will work on ?86). This is because currently on ?86 the memory layout
> is as follows:

But your patch is enabling randomization for x86_64, because CONFIG_X86
includes both 32 and 64 bit archs.

2007-08-15 21:04:00

by Jiri Kosina

[permalink] [raw]
Subject: Re: [PATCH] [RESEND] PIE executable randomization

On Wed, 15 Aug 2007, Chuck Ebbert wrote:

> But your patch is enabling randomization for x86_64, because CONFIG_X86
> includes both 32 and 64 bit archs.

Hi Chuck,

yes, and this is addressed by the second patch I have sent yesterday,
which enables flexmmap for x86_64.

--
Jiri Kosina
SUSE Labs