2009-10-19 14:41:28

by Ryan C. Gordon

[permalink] [raw]
Subject: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.


Allows kernel modules to be FatELF binaries.

Details, rationale, tools, and patches for handling FatELF binaries can be
found at http://icculus.org/fatelf/

Please note that this requires an updated depmod and modprobe to be truly
effective, but an unmodified insmod can work with FatELF binaries.

Signed-off-by: Ryan C. Gordon <[email protected]>
---
kernel/module.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 8b7d880..cda8f79 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2066,13 +2066,69 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
}
#endif

+/*
+ * See if we're a valid FatELF binary, find the right record, and
+ * return the offset of that record within the binary. Returns NULL if there's
+ * a problem, or a pointer to the real ELF header if we're okay.
+ * If we don't see the FatELF magic number, we assume this is a regular ELF
+ * binary and let the regular ELF checks handle it.
+ *
+ * This is a simplified version of examine_fatelf in fs/binfmt_elf.c
+ */
+static Elf_Ehdr *examine_fatelf_module(const unsigned char *hdr,
+ const unsigned long len)
+{
+ Elf_Ehdr elf;
+ int records, i;
+ const fatelf_hdr *fatelf = (const fatelf_hdr *) hdr;
+
+ if (likely(le32_to_cpu(fatelf->magic) != FATELF_MAGIC)) {
+ return (Elf_Ehdr *) hdr; /* not FatELF; not an error. */
+ } else if (unlikely(le16_to_cpu(fatelf->version) != 1)) {
+ return NULL; /* Unrecognized format version. */
+ }
+
+ memset(&elf, 0, sizeof (elf));
+
+ records = (int) fatelf->num_records; /* uint8, no byteswap needed */
+ for (i = 0; i < records; i++) {
+ const fatelf_record *record = &fatelf->records[i];
+
+ /* Fill in the data elf_check_arch() might care about. */
+ elf.e_ident[EI_OSABI] = record->osabi;
+ elf.e_ident[EI_CLASS] = record->word_size;
+ elf.e_ident[EI_DATA] = record->byte_order;
+ elf.e_machine = le16_to_cpu(record->machine);
+
+ if (likely(!elf_check_arch(&elf))) {
+ continue; /* Unsupported CPU architecture. */
+ } else {
+ const __u64 rec_offset = le64_to_cpu(record->offset);
+ const __u64 rec_size = le64_to_cpu(record->size);
+ const __u64 end_offset = rec_offset + rec_size;
+ const unsigned long uloff = (unsigned long) rec_offset;
+
+ if (unlikely(end_offset < rec_offset)) {
+ continue; /* overflow (corrupt file?)... */
+ } else if (unlikely(end_offset > len)) {
+ continue; /* past EOF. */
+ }
+
+ return (Elf_Ehdr *) (hdr + uloff);
+ }
+ }
+
+ return NULL; /* no binaries we could use. */
+}
+
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static noinline struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
- Elf_Ehdr *hdr;
+ Elf_Ehdr *hdr_alloc; /* returned from vmalloc */
+ Elf_Ehdr *hdr; /* adjusted hdr_alloc for FatELF */
Elf_Shdr *sechdrs;
char *secstrings, *args, *modmagic, *strtab = NULL;
char *staging;
@@ -2094,14 +2150,20 @@ static noinline struct module *load_module(void __user *umod,

/* Suck in entire file: we'll want most of it. */
/* vmalloc barfs on "unusual" numbers. Check here */
- if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
+ if (len > 64 * 1024 * 1024 || (hdr_alloc = vmalloc(len)) == NULL)
return ERR_PTR(-ENOMEM);

- if (copy_from_user(hdr, umod, len) != 0) {
+ if (copy_from_user(hdr_alloc, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
}

+ hdr = examine_fatelf_module((unsigned char *) hdr_alloc, len);
+ if (hdr == NULL) {
+ err = -ENOEXEC;
+ goto free_hdr;
+ }
+
/* Sanity checks against insmoding binaries or wrong arch,
weird elf version */
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
@@ -2505,7 +2567,7 @@ static noinline struct module *load_module(void __user *umod,
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);

/* Get rid of temporary copy */
- vfree(hdr);
+ vfree(hdr_alloc);

trace_module_load(mod);

@@ -2538,7 +2600,7 @@ static noinline struct module *load_module(void __user *umod,
kfree(args);
kfree(strmap);
free_hdr:
- vfree(hdr);
+ vfree(hdr_alloc);
return ERR_PTR(err);

truncated:
--
1.6.0.4


2009-10-20 00:12:53

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.

On 10/19/09 23:41, Ryan C. Gordon wrote:
> Allows kernel modules to be FatELF binaries.
>
> Details, rationale, tools, and patches for handling FatELF binaries can be
> found at http://icculus.org/fatelf/
>
> Please note that this requires an updated depmod and modprobe to be truly
> effective, but an unmodified insmod can work with FatELF binaries.
>

This seems much more dubious. It would only encourage more binary
modules, which we're not very keen on doing.

J

> Signed-off-by: Ryan C. Gordon <[email protected]>
> ---
> kernel/module.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 67 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/module.c b/kernel/module.c
> index 8b7d880..cda8f79 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2066,13 +2066,69 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
> }
> #endif
>
> +/*
> + * See if we're a valid FatELF binary, find the right record, and
> + * return the offset of that record within the binary. Returns NULL if there's
> + * a problem, or a pointer to the real ELF header if we're okay.
> + * If we don't see the FatELF magic number, we assume this is a regular ELF
> + * binary and let the regular ELF checks handle it.
> + *
> + * This is a simplified version of examine_fatelf in fs/binfmt_elf.c
> + */
> +static Elf_Ehdr *examine_fatelf_module(const unsigned char *hdr,
> + const unsigned long len)
> +{
> + Elf_Ehdr elf;
> + int records, i;
> + const fatelf_hdr *fatelf = (const fatelf_hdr *) hdr;
> +
> + if (likely(le32_to_cpu(fatelf->magic) != FATELF_MAGIC)) {
> + return (Elf_Ehdr *) hdr; /* not FatELF; not an error. */
> + } else if (unlikely(le16_to_cpu(fatelf->version) != 1)) {
> + return NULL; /* Unrecognized format version. */
> + }
> +
> + memset(&elf, 0, sizeof (elf));
> +
> + records = (int) fatelf->num_records; /* uint8, no byteswap needed */
> + for (i = 0; i < records; i++) {
> + const fatelf_record *record = &fatelf->records[i];
> +
> + /* Fill in the data elf_check_arch() might care about. */
> + elf.e_ident[EI_OSABI] = record->osabi;
> + elf.e_ident[EI_CLASS] = record->word_size;
> + elf.e_ident[EI_DATA] = record->byte_order;
> + elf.e_machine = le16_to_cpu(record->machine);
> +
> + if (likely(!elf_check_arch(&elf))) {
> + continue; /* Unsupported CPU architecture. */
> + } else {
> + const __u64 rec_offset = le64_to_cpu(record->offset);
> + const __u64 rec_size = le64_to_cpu(record->size);
> + const __u64 end_offset = rec_offset + rec_size;
> + const unsigned long uloff = (unsigned long) rec_offset;
> +
> + if (unlikely(end_offset < rec_offset)) {
> + continue; /* overflow (corrupt file?)... */
> + } else if (unlikely(end_offset > len)) {
> + continue; /* past EOF. */
> + }
> +
> + return (Elf_Ehdr *) (hdr + uloff);
> + }
> + }
> +
> + return NULL; /* no binaries we could use. */
> +}
> +
> /* Allocate and load the module: note that size of section 0 is always
> zero, and we rely on this for optional sections. */
> static noinline struct module *load_module(void __user *umod,
> unsigned long len,
> const char __user *uargs)
> {
> - Elf_Ehdr *hdr;
> + Elf_Ehdr *hdr_alloc; /* returned from vmalloc */
> + Elf_Ehdr *hdr; /* adjusted hdr_alloc for FatELF */
> Elf_Shdr *sechdrs;
> char *secstrings, *args, *modmagic, *strtab = NULL;
> char *staging;
> @@ -2094,14 +2150,20 @@ static noinline struct module *load_module(void __user *umod,
>
> /* Suck in entire file: we'll want most of it. */
> /* vmalloc barfs on "unusual" numbers. Check here */
> - if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
> + if (len > 64 * 1024 * 1024 || (hdr_alloc = vmalloc(len)) == NULL)
> return ERR_PTR(-ENOMEM);
>
> - if (copy_from_user(hdr, umod, len) != 0) {
> + if (copy_from_user(hdr_alloc, umod, len) != 0) {
> err = -EFAULT;
> goto free_hdr;
> }
>
> + hdr = examine_fatelf_module((unsigned char *) hdr_alloc, len);
> + if (hdr == NULL) {
> + err = -ENOEXEC;
> + goto free_hdr;
> + }
> +
> /* Sanity checks against insmoding binaries or wrong arch,
> weird elf version */
> if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
> @@ -2505,7 +2567,7 @@ static noinline struct module *load_module(void __user *umod,
> add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
>
> /* Get rid of temporary copy */
> - vfree(hdr);
> + vfree(hdr_alloc);
>
> trace_module_load(mod);
>
> @@ -2538,7 +2600,7 @@ static noinline struct module *load_module(void __user *umod,
> kfree(args);
> kfree(strmap);
> free_hdr:
> - vfree(hdr);
> + vfree(hdr_alloc);
> return ERR_PTR(err);
>
> truncated:
>

2009-10-20 04:54:46

by Ryan C. Gordon

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.


> > Allows kernel modules to be FatELF binaries.
>
> This seems much more dubious. It would only encourage more binary
> modules, which we're not very keen on doing.

I can understand that concern, but I worry about refusing to take steps
that would aid free software developers in case it might help the
closed-source people, too.

Those that will behave badly will do so regardless of file formats, but
distros shipping nothing but GPL'd software and in-tree drivers would
benefit from this more than another misguided company that probably
doesn't care about multiple CPU architectures anyhow.

--ryan.

2009-10-20 08:33:49

by Cong Wang

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.

On Mon, Oct 19, 2009 at 10:41 PM, Ryan C. Gordon <[email protected]> wrote:
>
> Allows kernel modules to be FatELF binaries.
>
> Details, rationale, tools, and patches for handling FatELF binaries can be
> found at http://icculus.org/fatelf/
>
> Please note that this requires an updated depmod and modprobe to be truly
> effective, but an unmodified insmod can work with FatELF binaries.
>
> Signed-off-by: Ryan C. Gordon <[email protected]>
> ---
>  kernel/module.c |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 files changed, 67 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/module.c b/kernel/module.c
> index 8b7d880..cda8f79 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2066,13 +2066,69 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
>  }
>  #endif
>
> +/*
> + * See if we're a valid FatELF binary, find the right record, and
> + *  return the offset of that record within the binary. Returns NULL if there's
> + *  a problem, or a pointer to the real ELF header if we're okay.
> + *  If we don't see the FatELF magic number, we assume this is a regular ELF
> + *  binary and let the regular ELF checks handle it.
> + *
> + * This is a simplified version of examine_fatelf in fs/binfmt_elf.c
> + */
> +static Elf_Ehdr *examine_fatelf_module(const unsigned char *hdr,
> +                                      const unsigned long len)
> +{
> +       Elf_Ehdr elf;
> +       int records, i;
> +       const fatelf_hdr *fatelf = (const fatelf_hdr *) hdr;


As for 'Elf_Ehdr', isn't 'Fatelf_hdr' better? :)

> +
> +       if (likely(le32_to_cpu(fatelf->magic) != FATELF_MAGIC)) {
> +               return (Elf_Ehdr *) hdr;  /* not FatELF; not an error. */
> +       } else if (unlikely(le16_to_cpu(fatelf->version) != 1)) {
> +               return NULL; /* Unrecognized format version. */
> +       }
> +


These braces are unnecessary.

> +       memset(&elf, 0, sizeof (elf));
> +
> +       records = (int) fatelf->num_records;  /* uint8, no byteswap needed */
> +       for (i = 0; i < records; i++) {
> +               const fatelf_record *record = &fatelf->records[i];
> +
> +               /* Fill in the data elf_check_arch() might care about. */
> +               elf.e_ident[EI_OSABI] = record->osabi;
> +               elf.e_ident[EI_CLASS] = record->word_size;
> +               elf.e_ident[EI_DATA] = record->byte_order;
> +               elf.e_machine = le16_to_cpu(record->machine);
> +
> +               if (likely(!elf_check_arch(&elf))) {
> +                       continue;  /* Unsupported CPU architecture. */

This 'continue' can be removed.


> +               } else {
> +                       const __u64 rec_offset = le64_to_cpu(record->offset);
> +                       const __u64 rec_size = le64_to_cpu(record->size);
> +                       const __u64 end_offset = rec_offset + rec_size;
> +                       const unsigned long uloff = (unsigned long) rec_offset;
> +
> +                       if (unlikely(end_offset < rec_offset)) {
> +                               continue;  /* overflow (corrupt file?)... */


ditto

> +                       } else if (unlikely(end_offset > len)) {
> +                               continue;  /* past EOF. */

ditto

> +                       }
> +
> +                       return (Elf_Ehdr *) (hdr + uloff);
> +               }
> +       }
> +
> +       return NULL;  /* no binaries we could use. */
> +}
> +
>  /* Allocate and load the module: note that size of section 0 is always
>    zero, and we rely on this for optional sections. */
>  static noinline struct module *load_module(void __user *umod,
>                                  unsigned long len,
>                                  const char __user *uargs)
>  {
> -       Elf_Ehdr *hdr;
> +       Elf_Ehdr *hdr_alloc;  /* returned from vmalloc */
> +       Elf_Ehdr *hdr;  /* adjusted hdr_alloc for FatELF */
>        Elf_Shdr *sechdrs;
>        char *secstrings, *args, *modmagic, *strtab = NULL;
>        char *staging;
> @@ -2094,14 +2150,20 @@ static noinline struct module *load_module(void __user *umod,
>
>        /* Suck in entire file: we'll want most of it. */
>        /* vmalloc barfs on "unusual" numbers.  Check here */
> -       if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
> +       if (len > 64 * 1024 * 1024 || (hdr_alloc = vmalloc(len)) == NULL)
>                return ERR_PTR(-ENOMEM);
>
> -       if (copy_from_user(hdr, umod, len) != 0) {
> +       if (copy_from_user(hdr_alloc, umod, len) != 0) {
>                err = -EFAULT;
>                goto free_hdr;
>        }
>
> +       hdr = examine_fatelf_module((unsigned char *) hdr_alloc, len);
> +       if (hdr == NULL) {
> +               err = -ENOEXEC;
> +               goto free_hdr;
> +       }
> +
>        /* Sanity checks against insmoding binaries or wrong arch,
>            weird elf version */
>        if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
> @@ -2505,7 +2567,7 @@ static noinline struct module *load_module(void __user *umod,
>        add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
>
>        /* Get rid of temporary copy */
> -       vfree(hdr);
> +       vfree(hdr_alloc);
>
>        trace_module_load(mod);
>
> @@ -2538,7 +2600,7 @@ static noinline struct module *load_module(void __user *umod,
>        kfree(args);
>        kfree(strmap);
>  free_hdr:
> -       vfree(hdr);
> +       vfree(hdr_alloc);
>        return ERR_PTR(err);
>
>  truncated:
> --
> 1.6.0.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

2009-10-21 08:06:51

by Ryan C. Gordon

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.


> As for 'Elf_Ehdr', isn't 'Fatelf_hdr' better? :)

Yeah, I struggled with that for awhile...the structs around it in elf.h
were all lowercase (elf32_hdr), but I moved it somewhere more appropriate
and fixed the case.

These were all good suggestions, so here's an updated version of the patch
with the changes you noted. Thank you for taking the time to review my
work!

--ryan.



>From 38cbb068c3970e9981549578c20207458a2b6659 Mon Sep 17 00:00:00 2001
From: Ryan C. Gordon <[email protected]>
Date: Tue, 20 Oct 2009 20:36:05 -0400
Subject: [PATCH] binfmt_elf: FatELF support for kernel modules.

Allows kernel modules to be FatELF binaries.

Details, rationale, tools, and patches for handling FatELF binaries can be
found at http://icculus.org/fatelf/

Please note that this requires an updated depmod and modprobe to be truly
effective, but an unmodified insmod can work with FatELF binaries.

Signed-off-by: Ryan C. Gordon <[email protected]>
---
kernel/module.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 8b7d880..e8b2e8f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2066,13 +2066,62 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
}
#endif

+/*
+ * See if we're a valid FatELF binary, find the right record, and
+ * return the offset of that record within the binary. Returns NULL if there's
+ * a problem, or a pointer to the real ELF header if we're okay.
+ * If we don't see the FatELF magic number, we assume this is a regular ELF
+ * binary and let the regular ELF checks handle it.
+ *
+ * This is a simplified version of examine_fatelf in fs/binfmt_elf.c
+ */
+static Elf_Ehdr *examine_fatelf_module(const unsigned char *hdr,
+ const unsigned long len)
+{
+ Elf_Ehdr elf;
+ int records, i;
+ const Fatelf_hdr *fatelf = (const Fatelf_hdr *) hdr;
+
+ if (likely(le32_to_cpu(fatelf->magic) != FATELF_MAGIC))
+ return (Elf_Ehdr *) hdr; /* not FatELF; not an error. */
+ else if (unlikely(le16_to_cpu(fatelf->version) != 1))
+ return NULL; /* Unrecognized format version. */
+
+ memset(&elf, 0, sizeof (elf));
+
+ records = (int) fatelf->num_records; /* uint8, no byteswap needed */
+ for (i = 0; i < records; i++) {
+ const Fatelf_record *record = &fatelf->records[i];
+
+ /* Fill in the data elf_check_arch() might care about. */
+ elf.e_ident[EI_OSABI] = record->osabi;
+ elf.e_ident[EI_CLASS] = record->word_size;
+ elf.e_ident[EI_DATA] = record->byte_order;
+ elf.e_machine = le16_to_cpu(record->machine);
+
+ if (unlikely(elf_check_arch(&elf))) {
+ const __u64 rec_offset = le64_to_cpu(record->offset);
+ const __u64 rec_size = le64_to_cpu(record->size);
+ const __u64 end_offset = rec_offset + rec_size;
+ const unsigned long uloff = (unsigned long) rec_offset;
+
+ /* check for overflow and past-EOF before choosing this record */
+ if (likely((end_offset >= rec_offset) && (end_offset <= len)))
+ return (Elf_Ehdr *) (hdr + uloff);
+ }
+ }
+
+ return NULL; /* no binaries we could use. */
+}
+
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static noinline struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
- Elf_Ehdr *hdr;
+ Elf_Ehdr *hdr_alloc; /* returned from vmalloc */
+ Elf_Ehdr *hdr; /* adjusted hdr_alloc for FatELF */
Elf_Shdr *sechdrs;
char *secstrings, *args, *modmagic, *strtab = NULL;
char *staging;
@@ -2094,14 +2143,20 @@ static noinline struct module *load_module(void __user *umod,

/* Suck in entire file: we'll want most of it. */
/* vmalloc barfs on "unusual" numbers. Check here */
- if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
+ if (len > 64 * 1024 * 1024 || (hdr_alloc = vmalloc(len)) == NULL)
return ERR_PTR(-ENOMEM);

- if (copy_from_user(hdr, umod, len) != 0) {
+ if (copy_from_user(hdr_alloc, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
}

+ hdr = examine_fatelf_module((unsigned char *) hdr_alloc, len);
+ if (hdr == NULL) {
+ err = -ENOEXEC;
+ goto free_hdr;
+ }
+
/* Sanity checks against insmoding binaries or wrong arch,
weird elf version */
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
@@ -2505,7 +2560,7 @@ static noinline struct module *load_module(void __user *umod,
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);

/* Get rid of temporary copy */
- vfree(hdr);
+ vfree(hdr_alloc);

trace_module_load(mod);

@@ -2538,7 +2593,7 @@ static noinline struct module *load_module(void __user *umod,
kfree(args);
kfree(strmap);
free_hdr:
- vfree(hdr);
+ vfree(hdr_alloc);
return ERR_PTR(err);

truncated:
--
1.6.0.4

2009-10-23 22:24:09

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.

On 10/19/09 21:54, Ryan C. Gordon wrote:
> I can understand that concern, but I worry about refusing to take steps
> that would aid free software developers in case it might help the
> closed-source people, too.
>

Any open source driver should be encouraged to be merged with mainline
Linux so there's no need to distribute them separately. With the
staging/ tree, that's easier than ever.

I don't see much upside in making it "easier" to distribute binary-only
open source drivers separately. (It wouldn't help that much, in the
end; the modules would still be compiled for some finite set of kernels,
and if the user wants to use something else they're still stuck.)

> Those that will behave badly will do so regardless of file formats, but
> distros shipping nothing but GPL'd software and in-tree drivers would
> benefit from this more than another misguided company that probably
> doesn't care about multiple CPU architectures anyhow.
>

Well, ideally a fat module would allow modules for multiple kernels to
be bundled together (same and/or different architectures), which is
primarily useful for 3rd-party binary distributions.


J

2009-10-24 00:31:14

by Ryan C. Gordon

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.



On Fri, 23 Oct 2009, Jeremy Fitzhardinge wrote:

> Any open source driver should be encouraged to be merged with mainline
> Linux so there's no need to distribute them separately. With the
> staging/ tree, that's easier than ever.

Yes, absolutely. I agree.

But even for mainline drivers, there's a compelling use-case for this,
even for a Linux distribution that's 100% Free Software.

Those that want to play outside the sandbox still have all the misery of
having to keep up with kernel interface changes and supporting multiple
kernel releases. That's the hard part for them, and I'm happy to continue
to make it uncomfortable in that regard. How the kernel module gets loaded
is probably not of great importance compared to the engineering resources
they probably devote to just keeping up.

> Well, ideally a fat module would allow modules for multiple kernels to
> be bundled together (same and/or different architectures), which is
> primarily useful for 3rd-party binary distributions.

If the OSABI version doesn't change, they can't use FatELF to pack
together multiple drivers for different kernel versions. fatelf-glue
refuses to merge them, and the kernel will pick the first one, and if it
fails to load, tough luck.

I think this should resolve the concern from a technical standpoint. They
still have the same problems.

--ryan.

2009-10-24 11:12:52

by Alan

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.

> Well, ideally a fat module would allow modules for multiple kernels to
> be bundled together (same and/or different architectures), which is
> primarily useful for 3rd-party binary distributions.

You would need thousands and thousands of binaries to do that. I'm not
sure the gigabyte sized module file would be too popular. It would be
easier to recompile it even automatically (take a look at the Dell stuff
for this)

Alan

2009-10-26 19:14:01

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.

On 10/24/09 04:14, Alan Cox wrote:
>> Well, ideally a fat module would allow modules for multiple kernels to
>> be bundled together (same and/or different architectures), which is
>> primarily useful for 3rd-party binary distributions.
>>
> You would need thousands and thousands of binaries to do that. I'm not
> sure the gigabyte sized module file would be too popular. It would be
> easier to recompile it even automatically (take a look at the Dell stuff
> for this)
>

Yeah, I should have been a bit clearer here. My point was that if this
facility were to exist, it would seem logical to support that kind of
mode of operation (since at least at one point vendors shipping binary
modules would seem to include a few enterprise distro builds and hope
that would cover it). But that doesn't seem like a very good idea.

Auto-building schemes are better, but they still always seem to break.

J

2009-10-26 19:57:28

by Alan

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.

> mode of operation (since at least at one point vendors shipping binary
> modules would seem to include a few enterprise distro builds and hope
> that would cover it). But that doesn't seem like a very good idea.

If you want to ship additional modules you can already do that, the
kernel search path is dependant upon the kernel name.

2009-10-30 02:25:47

by Ryan C. Gordon

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.


(Hopefully-final version, now checkpatch.pl approved!)

>From 16233022abfa6561b9416d374c74b4e42d7632ca Mon Sep 17 00:00:00 2001
From: Ryan C. Gordon <[email protected]>
Date: Tue, 27 Oct 2009 14:00:12 -0400
Subject: [PATCH] binfmt_elf: FatELF support for kernel modules.

Allows kernel modules to be FatELF binaries.

Details, rationale, tools, and patches for handling FatELF binaries can be
found at http://icculus.org/fatelf/

Please note that this requires an updated depmod and modprobe to be truly
effective, but an unmodified insmod can work with FatELF binaries.

Signed-off-by: Ryan C. Gordon <[email protected]>
---
kernel/module.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 8b7d880..94e0153 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2066,13 +2066,64 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
}
#endif

+/*
+ * See if we're a valid FatELF binary, find the right record, and
+ * return the offset of that record within the binary. Returns NULL if there's
+ * a problem, or a pointer to the real ELF header if we're okay.
+ * If we don't see the FatELF magic number, we assume this is a regular ELF
+ * binary and let the regular ELF checks handle it.
+ *
+ * This is a simplified version of examine_fatelf in fs/binfmt_elf.c
+ */
+static Elf_Ehdr *examine_fatelf_module(const unsigned char *hdr,
+ const unsigned long len)
+{
+ Elf_Ehdr elf;
+ int records, i;
+ const struct Fatelf_hdr *fatelf = (const struct Fatelf_hdr *) hdr;
+
+ if (likely(le32_to_cpu(fatelf->magic) != FATELF_MAGIC))
+ return (Elf_Ehdr *) hdr; /* not FatELF; not an error. */
+ else if (unlikely(le16_to_cpu(fatelf->version) != 1))
+ return NULL; /* Unrecognized format version. */
+
+ memset(&elf, 0, sizeof(elf));
+
+ records = (int) fatelf->num_records; /* uint8, no byteswap needed */
+ for (i = 0; i < records; i++) {
+ const struct Fatelf_record *record = &fatelf->records[i];
+
+ /* Fill in the data elf_check_arch() might care about. */
+ elf.e_ident[EI_OSABI] = record->osabi;
+ elf.e_ident[EI_CLASS] = record->word_size;
+ elf.e_ident[EI_DATA] = record->byte_order;
+ elf.e_machine = le16_to_cpu(record->machine);
+
+ if (unlikely(elf_check_arch(&elf))) {
+ const __u64 rec_offset = le64_to_cpu(record->offset);
+ const __u64 rec_size = le64_to_cpu(record->size);
+ const __u64 end_offset = rec_offset + rec_size;
+ const unsigned long uloff = (unsigned long) rec_offset;
+
+ /* check for overflow and past-EOF in this record */
+ if (likely(likely(end_offset >= rec_offset) &&
+ likely(end_offset <= len))) {
+ return (Elf_Ehdr *) (hdr + uloff);
+ }
+ }
+ }
+
+ return NULL; /* no binaries we could use. */
+}
+
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static noinline struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
- Elf_Ehdr *hdr;
+ Elf_Ehdr *hdr_alloc; /* returned from vmalloc */
+ Elf_Ehdr *hdr; /* adjusted hdr_alloc for FatELF */
Elf_Shdr *sechdrs;
char *secstrings, *args, *modmagic, *strtab = NULL;
char *staging;
@@ -2094,14 +2145,24 @@ static noinline struct module *load_module(void __user *umod,

/* Suck in entire file: we'll want most of it. */
/* vmalloc barfs on "unusual" numbers. Check here */
- if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
+ if (unlikely(len > 64 * 1024 * 1024))
+ return ERR_PTR(-ENOMEM);
+
+ hdr_alloc = vmalloc(len);
+ if (unlikely(hdr_alloc == NULL))
return ERR_PTR(-ENOMEM);

- if (copy_from_user(hdr, umod, len) != 0) {
+ if (copy_from_user(hdr_alloc, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
}

+ hdr = examine_fatelf_module((unsigned char *) hdr_alloc, len);
+ if (hdr == NULL) {
+ err = -ENOEXEC;
+ goto free_hdr;
+ }
+
/* Sanity checks against insmoding binaries or wrong arch,
weird elf version */
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
@@ -2505,7 +2566,7 @@ static noinline struct module *load_module(void __user *umod,
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);

/* Get rid of temporary copy */
- vfree(hdr);
+ vfree(hdr_alloc);

trace_module_load(mod);

@@ -2538,7 +2599,7 @@ static noinline struct module *load_module(void __user *umod,
kfree(args);
kfree(strmap);
free_hdr:
- vfree(hdr);
+ vfree(hdr_alloc);
return ERR_PTR(err);

truncated:
--
1.6.0.4

2009-10-30 10:30:33

by Alan

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.

> Allows kernel modules to be FatELF binaries.
>
> Details, rationale, tools, and patches for handling FatELF binaries can be
> found at http://icculus.org/fatelf/

And you still ignore the fundamental point that the kernel module search
path typically includes the architecture anyway so you can simply install
two module files. That makes this feature pointless.

NAK

2009-10-30 14:19:55

by Ryan C. Gordon

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] binfmt_elf: FatELF support for kernel modules.


> And you still ignore the fundamental point that the kernel module search
> path typically includes the architecture anyway so you can simply install
> two module files. That makes this feature pointless.

The module loading patch is just for the sake of completeness, so if
there's a hesitance to apply this part, I'm okay with that.

--ryan.