Compressed kernel has its own drawback: decompressing takes time. Even
though the time is short enough to ignore for most cases but for cases when
time is critical decompressing time still matters.
The patch adds a 'CONFIG_KERNEL_RAW' configure choice so the built binary
can have no decompressing at all. The experiment shows:
kernel kernel size time in decompress_kernel
compressed (gzip) 3.3M 53ms
compressed (lz4) 4.5M 16ms
uncompressed 14M 2ms
Signed-off-by: Chao Peng <[email protected]>
---
v2:
* add HAVE_KERNEL_RAW
* decode ELF kernel in place instead of getting another copy
* minor comment fix
---
arch/x86/Kconfig | 1 +
arch/x86/boot/compressed/Makefile | 3 +++
arch/x86/boot/compressed/misc.c | 18 +++++++++++++-----
init/Kconfig | 13 ++++++++++++-
scripts/Makefile.lib | 8 ++++++++
5 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc98d5a..207695c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -142,6 +142,7 @@ config X86
select HAVE_KERNEL_LZ4
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_RAW
select HAVE_KERNEL_XZ
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 44163e8..ed366e1 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -120,6 +120,8 @@ $(obj)/vmlinux.relocs: vmlinux FORCE
vmlinux.bin.all-y := $(obj)/vmlinux.bin
vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
+$(obj)/vmlinux.bin.raw: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,raw)
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,gzip)
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
@@ -133,6 +135,7 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lz4)
+suffix-$(CONFIG_KERNEL_RAW) := raw
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
suffix-$(CONFIG_KERNEL_LZMA) := lzma
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b3c5a5f0..9791ca9 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -51,6 +51,10 @@
static int vidport;
static int lines, cols;
+#ifdef CONFIG_KERNEL_RAW
+#include <linux/decompress/mm.h>
+#endif
+
#ifdef CONFIG_KERNEL_GZIP
#include "../../../../lib/decompress_inflate.c"
#endif
@@ -265,7 +269,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
{ }
#endif
-static void parse_elf(void *output)
+static void parse_elf(void* buf, void *output)
{
#ifdef CONFIG_X86_64
Elf64_Ehdr ehdr;
@@ -277,7 +281,7 @@ static void parse_elf(void *output)
void *dest;
int i;
- memcpy(&ehdr, output, sizeof(ehdr));
+ memcpy(&ehdr, buf, sizeof(ehdr));
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
@@ -292,7 +296,7 @@ static void parse_elf(void *output)
if (!phdrs)
error("Failed to allocate space for phdrs");
- memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
+ memcpy(phdrs, buf + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
for (i = 0; i < ehdr.e_phnum; i++) {
phdr = &phdrs[i];
@@ -305,7 +309,7 @@ static void parse_elf(void *output)
#else
dest = (void *)(phdr->p_paddr);
#endif
- memmove(dest, output + phdr->p_offset, phdr->p_filesz);
+ memmove(dest, buf + phdr->p_offset, phdr->p_filesz);
break;
default: /* Ignore other PT_* */ break;
}
@@ -401,10 +405,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
error("Destination virtual address changed when not relocatable");
#endif
+#ifdef CONFIG_KERNEL_RAW
+ parse_elf(input_data, output);
+#else
debug_putstr("\nDecompressing Linux... ");
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
- parse_elf(output);
+ parse_elf(output, output);
+#endif
handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel.\n");
return output;
diff --git a/init/Kconfig b/init/Kconfig
index a92f27d..b8926bb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -109,6 +109,9 @@ config LOCALVERSION_AUTO
which is done within the script "scripts/setlocalversion".)
+config HAVE_KERNEL_RAW
+ bool
+
config HAVE_KERNEL_GZIP
bool
@@ -130,7 +133,7 @@ config HAVE_KERNEL_LZ4
choice
prompt "Kernel compression mode"
default KERNEL_GZIP
- depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4
+ depends on HAVE_KERNEL_RAW || HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4
help
The linux kernel is a kind of self-extracting executable.
Several compression algorithms are available, which differ
@@ -149,6 +152,14 @@ choice
If in doubt, select 'gzip'
+config KERNEL_RAW
+ bool "RAW"
+ depends on HAVE_KERNEL_RAW
+ help
+ No compression. It creates much bigger kernel and uses much more
+ space (disk/memory) than other choices. It can be useful when
+ decompression speed is the most concern while space matters less.
+
config KERNEL_GZIP
bool "Gzip"
depends on HAVE_KERNEL_GZIP
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 0a07f90..8a8f9a9 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -357,6 +357,14 @@ cmd_lz4 = (cat $(filter-out FORCE,$^) | \
lz4c -l -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
(rm -f $@ ; false)
+# RAW
+# ---------------------------------------------------------------------------
+quiet_cmd_raw = RAW $@
+cmd_raw = (cat $(filter-out FORCE,$^) && \
+ $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+ (rm -f $@ ; false)
+
+
# U-Boot mkimage
# ---------------------------------------------------------------------------
--
1.8.3.1
On Tue, Apr 4, 2017 at 1:52 AM, Chao Peng <[email protected]> wrote:
> Compressed kernel has its own drawback: decompressing takes time. Even
> though the time is short enough to ignore for most cases but for cases when
> time is critical decompressing time still matters.
>
> The patch adds a 'CONFIG_KERNEL_RAW' configure choice so the built binary
> can have no decompressing at all. The experiment shows:
>
> kernel kernel size time in decompress_kernel
> compressed (gzip) 3.3M 53ms
> compressed (lz4) 4.5M 16ms
> uncompressed 14M 2ms
>
> Signed-off-by: Chao Peng <[email protected]>
> ---
> v2:
> * add HAVE_KERNEL_RAW
> * decode ELF kernel in place instead of getting another copy
> * minor comment fix
> ---
> arch/x86/Kconfig | 1 +
> arch/x86/boot/compressed/Makefile | 3 +++
> arch/x86/boot/compressed/misc.c | 18 +++++++++++++-----
> init/Kconfig | 13 ++++++++++++-
> scripts/Makefile.lib | 8 ++++++++
> 5 files changed, 37 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index cc98d5a..207695c 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -142,6 +142,7 @@ config X86
> select HAVE_KERNEL_LZ4
> select HAVE_KERNEL_LZMA
> select HAVE_KERNEL_LZO
> + select HAVE_KERNEL_RAW
> select HAVE_KERNEL_XZ
> select HAVE_KPROBES
> select HAVE_KPROBES_ON_FTRACE
> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> index 44163e8..ed366e1 100644
> --- a/arch/x86/boot/compressed/Makefile
> +++ b/arch/x86/boot/compressed/Makefile
> @@ -120,6 +120,8 @@ $(obj)/vmlinux.relocs: vmlinux FORCE
> vmlinux.bin.all-y := $(obj)/vmlinux.bin
> vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
>
> +$(obj)/vmlinux.bin.raw: $(vmlinux.bin.all-y) FORCE
> + $(call if_changed,raw)
> $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
> $(call if_changed,gzip)
> $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
> @@ -133,6 +135,7 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
> $(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
> $(call if_changed,lz4)
>
> +suffix-$(CONFIG_KERNEL_RAW) := raw
> suffix-$(CONFIG_KERNEL_GZIP) := gz
> suffix-$(CONFIG_KERNEL_BZIP2) := bz2
> suffix-$(CONFIG_KERNEL_LZMA) := lzma
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index b3c5a5f0..9791ca9 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -51,6 +51,10 @@
> static int vidport;
> static int lines, cols;
>
> +#ifdef CONFIG_KERNEL_RAW
> +#include <linux/decompress/mm.h>
> +#endif
> +
> #ifdef CONFIG_KERNEL_GZIP
> #include "../../../../lib/decompress_inflate.c"
> #endif
> @@ -265,7 +269,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
> { }
> #endif
>
> -static void parse_elf(void *output)
> +static void parse_elf(void* buf, void *output)
I think "buf" is too generic a name here. "input" would be better, IMO.
> {
> #ifdef CONFIG_X86_64
> Elf64_Ehdr ehdr;
> @@ -277,7 +281,7 @@ static void parse_elf(void *output)
> void *dest;
> int i;
>
> - memcpy(&ehdr, output, sizeof(ehdr));
> + memcpy(&ehdr, buf, sizeof(ehdr));
> if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
> ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
> ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
> @@ -292,7 +296,7 @@ static void parse_elf(void *output)
> if (!phdrs)
> error("Failed to allocate space for phdrs");
>
> - memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
> + memcpy(phdrs, buf + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
>
> for (i = 0; i < ehdr.e_phnum; i++) {
> phdr = &phdrs[i];
> @@ -305,7 +309,7 @@ static void parse_elf(void *output)
> #else
> dest = (void *)(phdr->p_paddr);
> #endif
> - memmove(dest, output + phdr->p_offset, phdr->p_filesz);
> + memmove(dest, buf + phdr->p_offset, phdr->p_filesz);
With the renaming from "buf" to "input" this becomes much more
comprehensible: the PT_LOAD segments from "input" are loaded into
"output". However, does this mean that the RAW load uses parse_elf to
move the kernel into place? Does this happen safely? If it's already
safe, shouldn't we not need "input" at all, and leave this as-is, like
how the decompressed kernel operate?
> break;
> default: /* Ignore other PT_* */ break;
> }
> @@ -401,10 +405,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> error("Destination virtual address changed when not relocatable");
> #endif
>
> +#ifdef CONFIG_KERNEL_RAW
> + parse_elf(input_data, output);
> +#else
> debug_putstr("\nDecompressing Linux... ");
> __decompress(input_data, input_len, NULL, NULL, output, output_len,
> NULL, error);
> - parse_elf(output);
> + parse_elf(output, output);
> +#endif
We should continue to avoid #ifdef blocks like this when possible. I'd
recommended C-style:
if (!IS_ENABLED(CONFIG_KERNEL_RAW)) {
debug_putstr("\nDecompressing Linux... ");
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
} else
output = input_data;
parse_elf(output);
> handle_relocations(output, output_len, virt_addr);
> debug_putstr("done.\nBooting the kernel.\n");
> return output;
> diff --git a/init/Kconfig b/init/Kconfig
> index a92f27d..b8926bb 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -109,6 +109,9 @@ config LOCALVERSION_AUTO
>
> which is done within the script "scripts/setlocalversion".)
>
> +config HAVE_KERNEL_RAW
> + bool
> +
> config HAVE_KERNEL_GZIP
> bool
>
> @@ -130,7 +133,7 @@ config HAVE_KERNEL_LZ4
> choice
> prompt "Kernel compression mode"
> default KERNEL_GZIP
> - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4
> + depends on HAVE_KERNEL_RAW || HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4
> help
> The linux kernel is a kind of self-extracting executable.
> Several compression algorithms are available, which differ
> @@ -149,6 +152,14 @@ choice
>
> If in doubt, select 'gzip'
>
> +config KERNEL_RAW
> + bool "RAW"
> + depends on HAVE_KERNEL_RAW
> + help
> + No compression. It creates much bigger kernel and uses much more
> + space (disk/memory) than other choices. It can be useful when
> + decompression speed is the most concern while space matters less.
> +
> config KERNEL_GZIP
> bool "Gzip"
> depends on HAVE_KERNEL_GZIP
> diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
> index 0a07f90..8a8f9a9 100644
> --- a/scripts/Makefile.lib
> +++ b/scripts/Makefile.lib
> @@ -357,6 +357,14 @@ cmd_lz4 = (cat $(filter-out FORCE,$^) | \
> lz4c -l -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
> (rm -f $@ ; false)
>
> +# RAW
> +# ---------------------------------------------------------------------------
> +quiet_cmd_raw = RAW $@
> +cmd_raw = (cat $(filter-out FORCE,$^) && \
> + $(call size_append, $(filter-out FORCE,$^))) > $@ || \
> + (rm -f $@ ; false)
> +
> +
> # U-Boot mkimage
> # ---------------------------------------------------------------------------
>
> --
> 1.8.3.1
>
-Kees
--
Kees Cook
Pixel Security
On Tue, Apr 4, 2017 at 12:14 PM, Kees Cook <[email protected]> wrote:
> On Tue, Apr 4, 2017 at 1:52 AM, Chao Peng <[email protected]> wrote:
>> - memmove(dest, output + phdr->p_offset, phdr->p_filesz);
>> + memmove(dest, buf + phdr->p_offset, phdr->p_filesz);
>
> With the renaming from "buf" to "input" this becomes much more
> comprehensible: the PT_LOAD segments from "input" are loaded into
> "output". However, does this mean that the RAW load uses parse_elf to
> move the kernel into place? Does this happen safely? If it's already
> safe, shouldn't we not need "input" at all, and leave this as-is, like
> how the decompressed kernel operate?
This is a bit of a brain dump of what I learned when I played with
this code a couple weeks ago:
parse_elf() is incredibly fragile. It seems to work correct in two cases:
1. We're randomizing. As far as I can tell, it just works.
2. We get lucky and all the memmoves are backwards. The first segment
overwrites the headers, the second segment may overwrite the first,
etc. This also requires that the (missing, ahem) memsets to zero out
the ends of segments don't overwrite anything. Of course, nothing
accounts for them now and adding the missing memset breaks the boot.
#2 is very fragile, needless to say.
I'd love to see the code that sets up the decompressor figure out much
wiggle room is actually needed and allocate accordingly.