Implement a helper elf_load that wraps elf_map and performs all
of the necessary work to ensure that when "memsz > filesz"
the bytes described by "memsz > filesz" are zeroed.
Link: https://lkml.kernel.org/r/[email protected]
Reported-by: Sebastian Ott <[email protected]>
Reported-by: Thomas Weißschuh <[email protected]>
Signed-off-by: "Eric W. Biederman" <[email protected]>
---
fs/binfmt_elf.c | 111 +++++++++++++++++++++---------------------------
1 file changed, 48 insertions(+), 63 deletions(-)
Can you please test this one?
With this patch I can boot a machine, and I like the structure much
better. Overall this seems a more reviewable and safer patch although
it is almost as aggressive in the cleanups.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7b3d2d491407..8bea9d974361 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -110,25 +110,6 @@ static struct linux_binfmt elf_format = {
#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
-static int set_brk(unsigned long start, unsigned long end, int prot)
-{
- start = ELF_PAGEALIGN(start);
- end = ELF_PAGEALIGN(end);
- if (end > start) {
- /*
- * Map the last of the bss segment.
- * If the header is requesting these pages to be
- * executable, honour that (ppc32 needs this).
- */
- int error = vm_brk_flags(start, end - start,
- prot & PROT_EXEC ? VM_EXEC : 0);
- if (error)
- return error;
- }
- current->mm->start_brk = current->mm->brk = end;
- return 0;
-}
-
/* We need to explicitly zero any fractional pages
after the data section (i.e. bss). This would
contain the junk from the file that should not
@@ -406,6 +387,51 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
return(map_addr);
}
+static unsigned long elf_load(struct file *filep, unsigned long addr,
+ const struct elf_phdr *eppnt, int prot, int type,
+ unsigned long total_size)
+{
+ unsigned long zero_start, zero_end;
+ unsigned long map_addr;
+
+ if (eppnt->p_filesz) {
+ map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
+ if (BAD_ADDR(map_addr))
+ return map_addr;
+ if (eppnt->p_memsz > eppnt->p_filesz) {
+ zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_filesz;
+ zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_memsz;
+
+ /* Zero the end of the last mapped page */
+ padzero(zero_start);
+ }
+ } else {
+ zero_start = ELF_PAGESTART(addr);
+ zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_memsz;
+ }
+ if (eppnt->p_memsz > eppnt->p_filesz) {
+ /*
+ * Map the last of the segment.
+ * If the header is requesting these pages to be
+ * executable, honour that (ppc32 needs this).
+ */
+ int error;
+
+ zero_start = ELF_PAGEALIGN(zero_start);
+ zero_end = ELF_PAGEALIGN(zero_end);
+
+ error = vm_brk_flags(zero_start, zero_end - zero_start,
+ prot & PROT_EXEC ? VM_EXEC : 0);
+ if (error)
+ map_addr = error;
+ }
+ return map_addr;
+}
+
+
static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
{
elf_addr_t min_addr = -1;
@@ -829,7 +855,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elf_phdr *elf_property_phdata = NULL;
unsigned long elf_bss, elf_brk;
- int bss_prot = 0;
int retval, i;
unsigned long elf_entry;
unsigned long e_entry;
@@ -1040,33 +1065,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (elf_ppnt->p_type != PT_LOAD)
continue;
- if (unlikely (elf_brk > elf_bss)) {
- unsigned long nbyte;
-
- /* There was a PT_LOAD segment with p_memsz > p_filesz
- before this one. Map anonymous pages, if needed,
- and clear the area. */
- retval = set_brk(elf_bss + load_bias,
- elf_brk + load_bias,
- bss_prot);
- if (retval)
- goto out_free_dentry;
- nbyte = ELF_PAGEOFFSET(elf_bss);
- if (nbyte) {
- nbyte = ELF_MIN_ALIGN - nbyte;
- if (nbyte > elf_brk - elf_bss)
- nbyte = elf_brk - elf_bss;
- if (clear_user((void __user *)elf_bss +
- load_bias, nbyte)) {
- /*
- * This bss-zeroing can fail if the ELF
- * file specifies odd protections. So
- * we don't check the return value
- */
- }
- }
- }
-
elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
!!interpreter, false);
@@ -1162,7 +1160,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
}
}
- error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
+ error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR_VALUE(error) ?
@@ -1217,10 +1215,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
- if (k > elf_brk) {
- bss_prot = elf_prot;
+ if (k > elf_brk)
elf_brk = k;
- }
}
e_entry = elf_ex->e_entry + load_bias;
@@ -1232,18 +1228,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
start_data += load_bias;
end_data += load_bias;
- /* Calling set_brk effectively mmaps the pages that we need
- * for the bss and break sections. We must do this before
- * mapping in the interpreter, to make sure it doesn't wind
- * up getting placed where the bss needs to go.
- */
- retval = set_brk(elf_bss, elf_brk, bss_prot);
- if (retval)
- goto out_free_dentry;
- if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
- retval = -EFAULT; /* Nobody gets to see this, but.. */
- goto out_free_dentry;
- }
+ current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk);
if (interpreter) {
elf_entry = load_elf_interp(interp_elf_ex,
--
2.41.0
On Mon, 25 Sep 2023, Eric W. Biederman wrote:
>
> Implement a helper elf_load that wraps elf_map and performs all
> of the necessary work to ensure that when "memsz > filesz"
> the bytes described by "memsz > filesz" are zeroed.
>
> Link: https://lkml.kernel.org/r/[email protected]
> Reported-by: Sebastian Ott <[email protected]>
> Reported-by: Thomas Weißschuh <[email protected]>
> Signed-off-by: "Eric W. Biederman" <[email protected]>
> ---
> fs/binfmt_elf.c | 111 +++++++++++++++++++++---------------------------
> 1 file changed, 48 insertions(+), 63 deletions(-)
>
> Can you please test this one?
>
That one did the trick! The arm box booted successful, ran the binaries
that were used for the repo of this issue, and ran the nolibc compiled
binaries from kselftests that initially triggered the loader issues.
Thanks,
Sebastian
On Mon, Sep 25, 2023 at 05:27:12PM +0200, Sebastian Ott wrote:
> On Mon, 25 Sep 2023, Eric W. Biederman wrote:
> >
> > Implement a helper elf_load that wraps elf_map and performs all
> > of the necessary work to ensure that when "memsz > filesz"
> > the bytes described by "memsz > filesz" are zeroed.
> >
> > Link: https://lkml.kernel.org/r/[email protected]
> > Reported-by: Sebastian Ott <[email protected]>
> > Reported-by: Thomas Wei?schuh <[email protected]>
> > Signed-off-by: "Eric W. Biederman" <[email protected]>
> > ---
> > fs/binfmt_elf.c | 111 +++++++++++++++++++++---------------------------
> > 1 file changed, 48 insertions(+), 63 deletions(-)
> >
> > Can you please test this one?
Eric thanks for doing this refactoring! This does look similar to the
earlier attempt:
https://lore.kernel.org/lkml/[email protected]/
and it's a bit easier to review.
> That one did the trick! The arm box booted successful, ran the binaries
> that were used for the repo of this issue, and ran the nolibc compiled
> binaries from kselftests that initially triggered the loader issues.
Thanks for testing! I need to dig out the other "weird" binaries (like
the mentioned ppc32 case) and see if I can get those tested too.
Pedro, are you able to test ppc64le musl libc with this patch too?
-Kees
--
Kees Cook
Kees Cook <[email protected]> writes:
> On Mon, Sep 25, 2023 at 05:27:12PM +0200, Sebastian Ott wrote:
>> On Mon, 25 Sep 2023, Eric W. Biederman wrote:
>> >
>> > Implement a helper elf_load that wraps elf_map and performs all
>> > of the necessary work to ensure that when "memsz > filesz"
>> > the bytes described by "memsz > filesz" are zeroed.
>> >
>> > Link: https://lkml.kernel.org/r/[email protected]
>> > Reported-by: Sebastian Ott <[email protected]>
>> > Reported-by: Thomas Weißschuh <[email protected]>
>> > Signed-off-by: "Eric W. Biederman" <[email protected]>
>> > ---
>> > fs/binfmt_elf.c | 111 +++++++++++++++++++++---------------------------
>> > 1 file changed, 48 insertions(+), 63 deletions(-)
>> >
>> > Can you please test this one?
>
> Eric thanks for doing this refactoring! This does look similar to the
> earlier attempt:
> https://lore.kernel.org/lkml/[email protected]/
> and it's a bit easier to review.
I need to context switch away for a while so Kees if you will
I will let you handle the rest of this one.
A couple of thoughts running through my head for anyone whose ambitious
might include cleaning up binfmt_elf.c
The elf_bss variable in load_elf_binary can be removed.
Work for a follow on patch is using my new elf_load in load_elf_interp
and possibly in load_elf_library. (More code size reduction).
An outstanding issue is if the first segment has filesz 0, and has a
randomized locations. But that is the same as today.
There is a whole question does it make sense for the elf loader
to have it's own helper vm_brk_flags in mm/mmap.c or would it
make more sense for binfmt_elf to do what binfmt_elf_fdpic does and
have everything to go through vm_mmap.
I think replacing vm_brk_flags with vm_mmap would allow fixing the
theoretical issue of filesz 0 and randomizing locations.
In this change I replaced an open coded padzero that did not clear
all of the way to the end of the page, with padzero that does.
I also stopped checking the return of padzero as there is at least
one known case where testing for failure is the wrong thing to do.
It looks like binfmt_elf_fdpic may have the proper set of tests
for when error handling can be safely completed.
I found a couple of commits in the old history
https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git,
that look very interesting in understanding this code.
commit 39b56d902bf3 ("[PATCH] binfmt_elf: clearing bss may fail")
commit c6e2227e4a3e ("[SPARC64]: Missing user access return value checks in fs/binfmt_elf.c and fs/compat.c")
commit 5bf3be033f50 ("v2.4.10.1 -> v2.4.10.2")
Looking at commit 39b56d902bf3 ("[PATCH] binfmt_elf: clearing bss may fail"):
> commit 39b56d902bf35241e7cba6cc30b828ed937175ad
> Author: Pavel Machek <[email protected]>
> Date: Wed Feb 9 22:40:30 2005 -0800
>
> [PATCH] binfmt_elf: clearing bss may fail
>
> So we discover that Borland's Kylix application builder emits weird elf
> files which describe a non-writeable bss segment.
>
> So remove the clear_user() check at the place where we zero out the bss. I
> don't _think_ there are any security implications here (plus we've never
> checked that clear_user() return value, so whoops if it is a problem).
>
> Signed-off-by: Pavel Machek <[email protected]>
> Signed-off-by: Andrew Morton <[email protected]>
> Signed-off-by: Linus Torvalds <[email protected]>
It seems pretty clear that binfmt_elf_fdpic with skipping clear_user
for non-writable segments and otherwise calling clear_user (aka padzero)
and checking it's return code is the right thing to do.
I just skipped the error checking as that avoids breaking things.
It looks like Borland's Kylix died in 2005 so it might be safe to
just consider read-only segments with memsz > filesz an error.
Looking at commit 5bf3be033f50 ("v2.4.10.1 -> v2.4.10.2") the
binfmt_elf.c bits confirm my guess that the weird structure is because
before that point binfmt_elf.c assumed there would be only a single
segment with memsz > filesz. Which is why the code was structured so
weirdly.
Looking a little farther it looks like the binfmt_elf.c was introduced
in Linux v1.0, with essentially the same structure in load_elf_binary as
it has now. Prior to that Linux hard coded support for a.out binaries
in execve. So if someone wants to add a Fixes tag it should be
"Fixes: v1.0"
Which finally explains to me why the code is so odd. For the most part
the code has only received maintenance for the last 30 years or so.
Strictly 29 years, but 30 has a better ring to it.
Anyway those are my rambling thoughts that might help someone.
For now I will be happy if we can get my elf_load helper tested
to everyone's satisfaction and merged.
Eric
Hi Eric,
kernel test robot noticed the following build warnings:
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Eric-W-Biederman/binfmt_elf-Support-segments-with-0-filesz-and-misaligned-starts/20230925-210022
base: https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
patch link: https://lore.kernel.org/r/87jzsemmsd.fsf_-_%40email.froward.int.ebiederm.org
patch subject: [PATCH] binfmt_elf: Support segments with 0 filesz and misaligned starts
config: i386-randconfig-141-20230926 (https://download.01.org/0day-ci/archive/20230926/[email protected]/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce: (https://download.01.org/0day-ci/archive/20230926/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Reported-by: Dan Carpenter <[email protected]>
| Closes: https://lore.kernel.org/r/[email protected]/
smatch warnings:
fs/binfmt_elf.c:431 elf_load() error: uninitialized symbol 'map_addr'.
vim +/map_addr +431 fs/binfmt_elf.c
a6409120b31666 Eric W. Biederman 2023-09-25 390 static unsigned long elf_load(struct file *filep, unsigned long addr,
a6409120b31666 Eric W. Biederman 2023-09-25 391 const struct elf_phdr *eppnt, int prot, int type,
a6409120b31666 Eric W. Biederman 2023-09-25 392 unsigned long total_size)
a6409120b31666 Eric W. Biederman 2023-09-25 393 {
a6409120b31666 Eric W. Biederman 2023-09-25 394 unsigned long zero_start, zero_end;
a6409120b31666 Eric W. Biederman 2023-09-25 395 unsigned long map_addr;
a6409120b31666 Eric W. Biederman 2023-09-25 396
a6409120b31666 Eric W. Biederman 2023-09-25 397 if (eppnt->p_filesz) {
a6409120b31666 Eric W. Biederman 2023-09-25 398 map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
a6409120b31666 Eric W. Biederman 2023-09-25 399 if (BAD_ADDR(map_addr))
a6409120b31666 Eric W. Biederman 2023-09-25 400 return map_addr;
a6409120b31666 Eric W. Biederman 2023-09-25 401 if (eppnt->p_memsz > eppnt->p_filesz) {
a6409120b31666 Eric W. Biederman 2023-09-25 402 zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
a6409120b31666 Eric W. Biederman 2023-09-25 403 eppnt->p_filesz;
a6409120b31666 Eric W. Biederman 2023-09-25 404 zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
a6409120b31666 Eric W. Biederman 2023-09-25 405 eppnt->p_memsz;
a6409120b31666 Eric W. Biederman 2023-09-25 406
a6409120b31666 Eric W. Biederman 2023-09-25 407 /* Zero the end of the last mapped page */
a6409120b31666 Eric W. Biederman 2023-09-25 408 padzero(zero_start);
a6409120b31666 Eric W. Biederman 2023-09-25 409 }
a6409120b31666 Eric W. Biederman 2023-09-25 410 } else {
a6409120b31666 Eric W. Biederman 2023-09-25 411 zero_start = ELF_PAGESTART(addr);
a6409120b31666 Eric W. Biederman 2023-09-25 412 zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
a6409120b31666 Eric W. Biederman 2023-09-25 413 eppnt->p_memsz;
For this else path, map_addr is only set if there is an error.
a6409120b31666 Eric W. Biederman 2023-09-25 414 }
a6409120b31666 Eric W. Biederman 2023-09-25 415 if (eppnt->p_memsz > eppnt->p_filesz) {
a6409120b31666 Eric W. Biederman 2023-09-25 416 /*
a6409120b31666 Eric W. Biederman 2023-09-25 417 * Map the last of the segment.
a6409120b31666 Eric W. Biederman 2023-09-25 418 * If the header is requesting these pages to be
a6409120b31666 Eric W. Biederman 2023-09-25 419 * executable, honour that (ppc32 needs this).
a6409120b31666 Eric W. Biederman 2023-09-25 420 */
a6409120b31666 Eric W. Biederman 2023-09-25 421 int error;
a6409120b31666 Eric W. Biederman 2023-09-25 422
a6409120b31666 Eric W. Biederman 2023-09-25 423 zero_start = ELF_PAGEALIGN(zero_start);
a6409120b31666 Eric W. Biederman 2023-09-25 424 zero_end = ELF_PAGEALIGN(zero_end);
a6409120b31666 Eric W. Biederman 2023-09-25 425
a6409120b31666 Eric W. Biederman 2023-09-25 426 error = vm_brk_flags(zero_start, zero_end - zero_start,
a6409120b31666 Eric W. Biederman 2023-09-25 427 prot & PROT_EXEC ? VM_EXEC : 0);
a6409120b31666 Eric W. Biederman 2023-09-25 428 if (error)
a6409120b31666 Eric W. Biederman 2023-09-25 429 map_addr = error;
a6409120b31666 Eric W. Biederman 2023-09-25 430 }
a6409120b31666 Eric W. Biederman 2023-09-25 @431 return map_addr;
a6409120b31666 Eric W. Biederman 2023-09-25 432 }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Mon, Sep 25, 2023 at 10:27:02PM -0500, Eric W. Biederman wrote:
> Kees Cook <[email protected]> writes:
>
> > On Mon, Sep 25, 2023 at 05:27:12PM +0200, Sebastian Ott wrote:
> >> On Mon, 25 Sep 2023, Eric W. Biederman wrote:
> >> >
> >> > Implement a helper elf_load that wraps elf_map and performs all
> >> > of the necessary work to ensure that when "memsz > filesz"
> >> > the bytes described by "memsz > filesz" are zeroed.
> >> >
> >> > Link: https://lkml.kernel.org/r/[email protected]
> >> > Reported-by: Sebastian Ott <[email protected]>
> >> > Reported-by: Thomas Wei?schuh <[email protected]>
> >> > Signed-off-by: "Eric W. Biederman" <[email protected]>
> >> > ---
> >> > fs/binfmt_elf.c | 111 +++++++++++++++++++++---------------------------
> >> > 1 file changed, 48 insertions(+), 63 deletions(-)
> >> >
> >> > Can you please test this one?
> >
> > Eric thanks for doing this refactoring! This does look similar to the
> > earlier attempt:
> > https://lore.kernel.org/lkml/[email protected]/
> > and it's a bit easier to review.
>
> I need to context switch away for a while so Kees if you will
> I will let you handle the rest of this one.
>
>
> A couple of thoughts running through my head for anyone whose ambitious
> might include cleaning up binfmt_elf.c
>
> The elf_bss variable in load_elf_binary can be removed.
>
> Work for a follow on patch is using my new elf_load in load_elf_interp
> and possibly in load_elf_library. (More code size reduction).
>
> An outstanding issue is if the first segment has filesz 0, and has a
> randomized locations. But that is the same as today.
>
> There is a whole question does it make sense for the elf loader
> to have it's own helper vm_brk_flags in mm/mmap.c or would it
> make more sense for binfmt_elf to do what binfmt_elf_fdpic does and
> have everything to go through vm_mmap.
>
> I think replacing vm_brk_flags with vm_mmap would allow fixing the
> theoretical issue of filesz 0 and randomizing locations.
>
>
>
> In this change I replaced an open coded padzero that did not clear
> all of the way to the end of the page, with padzero that does.
Yeah, the resulting code is way more readable now.
> I also stopped checking the return of padzero as there is at least
> one known case where testing for failure is the wrong thing to do.
> It looks like binfmt_elf_fdpic may have the proper set of tests
> for when error handling can be safely completed.
>
> I found a couple of commits in the old history
> https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git,
> that look very interesting in understanding this code.
>
> commit 39b56d902bf3 ("[PATCH] binfmt_elf: clearing bss may fail")
> commit c6e2227e4a3e ("[SPARC64]: Missing user access return value checks in fs/binfmt_elf.c and fs/compat.c")
> commit 5bf3be033f50 ("v2.4.10.1 -> v2.4.10.2")
>
> Looking at commit 39b56d902bf3 ("[PATCH] binfmt_elf: clearing bss may fail"):
> > commit 39b56d902bf35241e7cba6cc30b828ed937175ad
> > Author: Pavel Machek <[email protected]>
> > Date: Wed Feb 9 22:40:30 2005 -0800
> >
> > [PATCH] binfmt_elf: clearing bss may fail
> >
> > So we discover that Borland's Kylix application builder emits weird elf
> > files which describe a non-writeable bss segment.
> >
> > So remove the clear_user() check at the place where we zero out the bss. I
> > don't _think_ there are any security implications here (plus we've never
> > checked that clear_user() return value, so whoops if it is a problem).
> >
> > Signed-off-by: Pavel Machek <[email protected]>
> > Signed-off-by: Andrew Morton <[email protected]>
> > Signed-off-by: Linus Torvalds <[email protected]>
>
> It seems pretty clear that binfmt_elf_fdpic with skipping clear_user
> for non-writable segments and otherwise calling clear_user (aka padzero)
> and checking it's return code is the right thing to do.
>
> I just skipped the error checking as that avoids breaking things.
>
> It looks like Borland's Kylix died in 2005 so it might be safe to
> just consider read-only segments with memsz > filesz an error.
I really feel like having a read-only BSS is a pathological state that
should be detected early?
> Looking at commit 5bf3be033f50 ("v2.4.10.1 -> v2.4.10.2") the
> binfmt_elf.c bits confirm my guess that the weird structure is because
> before that point binfmt_elf.c assumed there would be only a single
> segment with memsz > filesz. Which is why the code was structured so
> weirdly.
Agreed.
> Looking a little farther it looks like the binfmt_elf.c was introduced
> in Linux v1.0, with essentially the same structure in load_elf_binary as
> it has now. Prior to that Linux hard coded support for a.out binaries
> in execve. So if someone wants to add a Fixes tag it should be
> "Fixes: v1.0"
>
> Which finally explains to me why the code is so odd. For the most part
> the code has only received maintenance for the last 30 years or so.
> Strictly 29 years, but 30 has a better ring to it.
>
> Anyway those are my rambling thoughts that might help someone.
> For now I will be happy if we can get my elf_load helper tested
> to everyone's satisfaction and merged.
I'm probably going to pull most of this email into the commit log for
the v2 patch -- there's good history here worth capturing.
--
Kees Cook