2013-04-26 19:04:12

by Kees Cook

[permalink] [raw]
Subject: [PATCH v4 0/6] kernel ASLR

This series includes various small cleanups noted in the individual
patches. Further work will include 64-bit separation of physical and
virtual memory locations.

-Kees


2013-04-26 19:04:23

by Kees Cook

[permalink] [raw]
Subject: [PATCH 4/6] x86: kaslr: select random base offset

Select a random location when CONFIG_RANDOMIZE_BASE is used, bounded
by CONFIG_RANDOMIZE_BASE_MAX_OFFSET. Sources of randomness currently
include RDRAND and RDTSC.

Signed-off-by: Kees Cook <[email protected]>
---
v2:
- use rdtscl from msr.h, thanks to Mathias Krause.
---
arch/x86/Kconfig | 29 +++++++++++++++--
arch/x86/boot/compressed/aslr.c | 67 +++++++++++++++++++++++++++++++++++++--
2 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6f59afe..78db42d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1700,9 +1700,34 @@ config RELOCATABLE
(CONFIG_PHYSICAL_START) is ignored.

config RANDOMIZE_BASE
- bool "Enable 64-bit relocation support (for KASLR)"
+ bool "Randomize the address of the kernel image"
depends on RELOCATABLE
+ depends on !HIBERNATION
default n
+ ---help---
+ Randomizes the physical and virtual address at which the
+ kernel image is decompressed, as a security feature that
+ deters exploit attempts relying on knowledge of the location
+ of kernel internals.
+
+ Entropy is generated using the RDRAND instruction if it
+ is supported. If not, then RDTSC is used, if supported. If
+ neither RDRAND nor RDTSC are supported, then no randomness
+ is introduced.
+
+ The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
+ and aligned according to PHYSICAL_ALIGN.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+ hex "Maximum ASLR offset allowed"
+ depends on RANDOMIZE_BASE
+ default "0x10000000"
+ range 0x0 0x10000000
+ ---help---
+ Determines the maximal offset in bytes that will be applied to the
+ kernel when Address Space Layout Randomization (ASLR) is active.
+ Must be less than or equal to the actual physical memory on the
+ system. This must be a power of two.

# Relocation on x86 needs some additional build support
config X86_NEED_RELOCS
@@ -1711,7 +1736,7 @@ config X86_NEED_RELOCS

config PHYSICAL_ALIGN
hex "Alignment value to which kernel should be aligned"
- default "0x1000000"
+ default "0x200000"
range 0x2000 0x1000000
---help---
This value puts the alignment restrictions on physical address
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index d5331ee..4647e3f 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -1,19 +1,82 @@
#include "misc.h"

#ifdef CONFIG_RANDOMIZE_BASE
+#include <asm/msr.h>
+
+#include <asm/archrandom.h>
+static inline int rdrand(unsigned long *v)
+{
+ int ok;
+ asm volatile("1: " RDRAND_LONG "\n\t"
+ "jc 2f\n\t"
+ "decl %0\n\t"
+ "jnz 1b\n\t"
+ "2:"
+ : "=r" (ok), "=a" (*v)
+ : "0" (RDRAND_RETRY_LOOPS));
+ return ok;
+}
+
+static unsigned long get_random_long(void)
+{
+ if (has_cpuflag(X86_FEATURE_RDRAND)) {
+ unsigned long random;
+
+ debug_putstr("KASLR using RDRAND...\n");
+ if (rdrand(&random))
+ return random;
+ }
+
+ if (has_cpuflag(X86_FEATURE_TSC)) {
+ uint32_t raw;
+ unsigned long timer;
+
+ debug_putstr("KASLR using RDTSC...\n");
+ rdtscl(raw);
+
+ /* Repeat the low bits of rdtsc. */
+ timer = raw & 0xffff;
+ timer |= (timer << 16);
+#ifdef CONFIG_X86_64
+ timer |= (timer << 32) | (timer << 48);
+#endif
+
+ return timer;
+ }
+
+ debug_putstr("KASLR found no entropy source...\n");
+ return 0;
+}

unsigned char *choose_kernel_location(unsigned char *hint, unsigned long size)
{
unsigned char *choice = hint;
- unsigned long random;
+ unsigned long random, mask;

if (cmdline_find_option_bool("noaslr")) {
debug_putstr("KASLR disabled...\n");
goto out;
}

- /* XXX: choose random location. */
+ random = get_random_long();
+
+ /* Clip off top of the range. */
+ mask = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1;
+ random &= mask;
+
+ /* XXX: Find an appropriate E820 hole, instead of adding hint. */
+ random += (unsigned long)hint;
+
+ /* XXX: Clip to E820 hole, instead of just using hint. */
+ mask = (unsigned long)hint + CONFIG_RANDOMIZE_BASE_MAX_OFFSET;
+ while (random + size > mask)
+ random >>= 1;
+
+ /* Clip off bottom of range (via alignment). */
+ mask = CONFIG_PHYSICAL_ALIGN - 1;
+ random &= ~mask;

+ choice = (unsigned char *)random;
out:
return choice;
}
--
1.7.9.5

2013-04-26 19:04:22

by Kees Cook

[permalink] [raw]
Subject: [PATCH 6/6] x86: kaslr: report kernel offset on panic

When the system panics, include the kernel offset in the report to assist
in debugging.

Signed-off-by: Kees Cook <[email protected]>
---
arch/x86/kernel/setup.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fae9134..95a33b1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -830,6 +830,18 @@ static void __init trim_low_memory_range(void)
}

/*
+ * Dump out kernel offset information on panic.
+ */
+static int
+dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
+{
+ pr_emerg("Kernel Offset: 0x%lx\n",
+ (unsigned long)&_text - __START_KERNEL);
+
+ return 0;
+}
+
+/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
* for initialization. Note, the efi init code path is determined by the
@@ -1249,3 +1261,15 @@ void __init i386_reserve_resources(void)
}

#endif /* CONFIG_X86_32 */
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+ return 0;
+}
+__initcall(register_kernel_offset_dumper);
--
1.7.9.5

2013-04-26 19:04:36

by Kees Cook

[permalink] [raw]
Subject: [PATCH 1/6] x86: kaslr: move ELF relocation handling to C

Moves the relocation handling into C, after decompression. Only
kernels that need relocation support will use the code. The new
CONFIG_RANDOMIZE_BASE does not yet do anything except turn on this logic
for 64-bit kernels.

Based on work by Neill Clift and Michael Davidson.

Signed-off-by: Kees Cook <[email protected]>
---
v2:
- made CONFIG_RANDOMIZE_BASE interactively selectable.
---
arch/x86/Kconfig | 11 +++--
arch/x86/Makefile | 8 ++--
arch/x86/boot/compressed/head_32.S | 31 ++------------
arch/x86/boot/compressed/head_64.S | 1 +
arch/x86/boot/compressed/misc.c | 77 +++++++++++++++++++++++++++++++++-
arch/x86/include/asm/page_32_types.h | 2 +
arch/x86/include/asm/page_64_types.h | 5 ---
arch/x86/include/asm/page_types.h | 6 +++
8 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 15b5cef..6f59afe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1699,13 +1699,18 @@ config RELOCATABLE
it has been loaded at and the compile time physical address
(CONFIG_PHYSICAL_START) is ignored.

-# Relocation on x86-32 needs some additional build support
+config RANDOMIZE_BASE
+ bool "Enable 64-bit relocation support (for KASLR)"
+ depends on RELOCATABLE
+ default n
+
+# Relocation on x86 needs some additional build support
config X86_NEED_RELOCS
def_bool y
- depends on X86_32 && RELOCATABLE
+ depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)

config PHYSICAL_ALIGN
- hex "Alignment value to which kernel should be aligned" if X86_32
+ hex "Alignment value to which kernel should be aligned"
default "0x1000000"
range 0x2000 0x1000000
---help---
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5c47726..43f8cef 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -16,6 +16,10 @@ endif
# e.g.: obj-y += foo_$(BITS).o
export BITS

+ifdef CONFIG_X86_NEED_RELOCS
+ LDFLAGS_vmlinux := --emit-relocs
+endif
+
ifeq ($(CONFIG_X86_32),y)
BITS := 32
UTS_MACHINE := i386
@@ -25,10 +29,6 @@ ifeq ($(CONFIG_X86_32),y)
KBUILD_AFLAGS += $(biarch)
KBUILD_CFLAGS += $(biarch)

- ifdef CONFIG_RELOCATABLE
- LDFLAGS_vmlinux := --emit-relocs
- endif
-
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return

# Never want PIC in a 32-bit kernel, prevent breakage with GCC built
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1e3184f..5d6f689 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -181,8 +181,9 @@ relocated:
/*
* Do the decompression, and jump to the new kernel..
*/
- leal z_extract_offset_negative(%ebx), %ebp
/* push arguments for decompress_kernel: */
+ pushl $z_output_len /* decompressed length */
+ leal z_extract_offset_negative(%ebx), %ebp
pushl %ebp /* output address */
pushl $z_input_len /* input_len */
leal input_data(%ebx), %eax
@@ -191,33 +192,7 @@ relocated:
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
call decompress_kernel
- addl $20, %esp
-
-#if CONFIG_RELOCATABLE
-/*
- * Find the address of the relocations.
- */
- leal z_output_len(%ebp), %edi
-
-/*
- * Calculate the delta between where vmlinux was compiled to run
- * and where it was actually loaded.
- */
- movl %ebp, %ebx
- subl $LOAD_PHYSICAL_ADDR, %ebx
- jz 2f /* Nothing to be done if loaded at compiled addr. */
-/*
- * Process relocations.
- */
-
-1: subl $4, %edi
- movl (%edi), %ecx
- testl %ecx, %ecx
- jz 2f
- addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
- jmp 1b
-2:
-#endif
+ addl $24, %esp

/*
* Jump to the decompressed kernel.
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index c1d383d..81ca174 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -340,6 +340,7 @@ relocated:
leaq input_data(%rip), %rdx /* input_data */
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
+ movq $z_output_len, %r9 /* decompressed length */
call decompress_kernel
popq %rsi

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 7cb56c6..b756a04 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -267,6 +267,79 @@ static void error(char *x)
asm("hlt");
}

+#if CONFIG_X86_NEED_RELOCS
+static void handle_relocations(void *output, unsigned long output_len)
+{
+ int *reloc;
+ unsigned long delta, map, ptr;
+ unsigned long min_addr = (unsigned long)output;
+ unsigned long max_addr = min_addr + output_len;
+
+ /*
+ * Calculate the delta between where vmlinux was linked to load
+ * and where it was actually loaded.
+ */
+ delta = min_addr - LOAD_PHYSICAL_ADDR;
+ if (!delta) {
+ debug_putstr("No relocation needed... ");
+ return;
+ }
+ debug_putstr("Performing relocations... ");
+
+ /*
+ * The kernel contains a table of relocation addresses. Those
+ * addresses have the final load address of the kernel in virtual
+ * memory. We are currently working in the self map. So we need to
+ * create an adjustment for kernel memory addresses to the self map.
+ * This will involve subtracting out the base address of the kernel.
+ */
+ map = delta - __START_KERNEL_map;
+
+ /*
+ * Process relocations: 32 bit relocations first then 64 bit after.
+ * Two sets of binary relocations are added to the end of the kernel
+ * before compression. Each relocation table entry is the kernel
+ * address of the location which needs to be updated stored as a
+ * 32-bit value which is sign extended to 64 bits.
+ *
+ * Format is:
+ *
+ * kernel bits...
+ * 0 - zero terminator for 64 bit relocations
+ * 64 bit relocation repeated
+ * 0 - zero terminator for 32 bit relocations
+ * 32 bit relocation repeated
+ *
+ * So we work backwards from the end of the decompressed image.
+ */
+ for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
+ int extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("32-bit relocation outside of kernel!\n");
+
+ *(uint32_t *)ptr += delta;
+ }
+#ifdef CONFIG_X86_64
+ for (reloc--; *reloc; reloc--) {
+ long extended = *reloc;
+ extended += map;
+
+ ptr = (unsigned long)extended;
+ if (ptr < min_addr || ptr > max_addr)
+ error("64-bit relocation outside of kernel!\n");
+
+ *(uint64_t *)ptr += delta;
+ }
+#endif
+}
+#else
+static inline void handle_relocations(void *output, unsigned long output_len)
+{ }
+#endif
+
static void parse_elf(void *output)
{
#ifdef CONFIG_X86_64
@@ -321,7 +394,8 @@ static void parse_elf(void *output)
asmlinkage void decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
- unsigned char *output)
+ unsigned char *output,
+ unsigned long output_len)
{
real_mode = rmode;

@@ -361,6 +435,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
+ handle_relocations(output, output_len);
debug_putstr("done.\nBooting the kernel.\n");
return;
}
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index ef17af0..f48b17d 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -15,6 +15,8 @@
*/
#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)

+#define __START_KERNEL_map __PAGE_OFFSET
+
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8b491e6..203e98a 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -32,11 +32,6 @@
*/
#define __PAGE_OFFSET _AC(0xffff880000000000, UL)

-#define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \
- (CONFIG_PHYSICAL_ALIGN - 1)) & \
- ~(CONFIG_PHYSICAL_ALIGN - 1))
-
-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)

/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 54c9787..086c2fa 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -33,6 +33,12 @@
(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)

+#define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \
+ (CONFIG_PHYSICAL_ALIGN - 1)) & \
+ ~(CONFIG_PHYSICAL_ALIGN - 1))
+
+#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
+
#ifdef CONFIG_X86_64
#include <asm/page_64_types.h>
#else
--
1.7.9.5

2013-04-26 19:05:03

by Kees Cook

[permalink] [raw]
Subject: [PATCH 5/6] x86: kaslr: select memory region from e820 maps

This chooses the largest contiguous RAM region for the KASLR offset
to live in.

Signed-off-by: Kees Cook <[email protected]>
---
v2:
- make sure to exclude e820 regions outside the 32-bit memory range.
---
arch/x86/boot/compressed/aslr.c | 47 ++++++++++++++++++++++++++++++++++-----
1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 4647e3f..3d3789e 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -2,6 +2,7 @@

#ifdef CONFIG_RANDOMIZE_BASE
#include <asm/msr.h>
+#include <asm/e820.h>

#include <asm/archrandom.h>
static inline int rdrand(unsigned long *v)
@@ -48,28 +49,62 @@ static unsigned long get_random_long(void)
return 0;
}

+int largest_ram_region(unsigned long *start, unsigned long *size)
+{
+ int i, rc = 0;
+
+ *size = 0;
+ for (i = 0; i < real_mode->e820_entries; i++) {
+ struct e820entry *entry = &real_mode->e820_map[i];
+
+ if (entry->type != E820_RAM)
+ continue;
+
+ /* XXX: Handle arbitrary physical location. */
+ if (entry->addr > UINT_MAX)
+ continue;
+
+ if (entry->size > *size) {
+ *size = entry->size;
+ *start = entry->addr;
+ rc = 1;
+ }
+ }
+ return rc;
+}
+
unsigned char *choose_kernel_location(unsigned char *hint, unsigned long size)
{
unsigned char *choice = hint;
unsigned long random, mask;
+ unsigned long addr, length;

if (cmdline_find_option_bool("noaslr")) {
debug_putstr("KASLR disabled...\n");
goto out;
}

+ /* Find an appropriate E820 entry. */
+ if (!largest_ram_region(&addr, &length)) {
+ debug_putstr("KASLR could not find suitable E820 region...\n");
+ goto out;
+ }
+
random = get_random_long();

- /* Clip off top of the range. */
+ /* XXX: Rework page tables to handle arbitrary physical location. */
mask = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1;
random &= mask;

- /* XXX: Find an appropriate E820 hole, instead of adding hint. */
- random += (unsigned long)hint;
+ /* Clip to E820 entry size. */
+ while (random > length)
+ random >>= 1;
+
+ /* Offset the target. */
+ random += addr;

- /* XXX: Clip to E820 hole, instead of just using hint. */
- mask = (unsigned long)hint + CONFIG_RANDOMIZE_BASE_MAX_OFFSET;
- while (random + size > mask)
+ /* Clip end to E820 entry size. */
+ while (random + size > addr + length)
random >>= 1;

/* Clip off bottom of range (via alignment). */
--
1.7.9.5

2013-04-26 19:04:59

by Kees Cook

[permalink] [raw]
Subject: [PATCH 3/6] x86: kaslr: return location from decompress_kernel

This allows decompress_kernel to return a new location for the kernel to
be relocated to. With CONFIG_RANDOMIZE_BASE, the choose_kernel_location
routine will select a new location to decompress the kernel, and is
presently a no-op. The logic for bypassing this routine with "noaslr"
on the kernel command line is handled.

Signed-off-by: Kees Cook <[email protected]>
---
Documentation/kernel-parameters.txt | 4 ++++
arch/x86/boot/compressed/Makefile | 2 +-
arch/x86/boot/compressed/aslr.c | 21 +++++++++++++++++++++
arch/x86/boot/compressed/cmdline.c | 2 +-
arch/x86/boot/compressed/head_32.S | 2 +-
arch/x86/boot/compressed/head_64.S | 2 +-
arch/x86/boot/compressed/misc.c | 7 +++++--
arch/x86/boot/compressed/misc.h | 22 ++++++++++++++++------
8 files changed, 50 insertions(+), 12 deletions(-)
create mode 100644 arch/x86/boot/compressed/aslr.c

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8ccbf27..eb1c62c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1862,6 +1862,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noapic [SMP,APIC] Tells the kernel to not make use of any
IOAPICs that may be present in the system.

+ noaslr [X86]
+ Disable kernel base offset ASLR (Address Space
+ Layout Randomization) if built into the kernel.
+
noautogroup Disable scheduler automatic task group creation.

nobats [PPC] Do not use BATs for mapping kernel lowmem
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index d854390..fa2629b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,7 +26,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include

VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
- $(obj)/piggy.o $(obj)/cpuflags.o
+ $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o

$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
new file mode 100644
index 0000000..d5331ee
--- /dev/null
+++ b/arch/x86/boot/compressed/aslr.c
@@ -0,0 +1,21 @@
+#include "misc.h"
+
+#ifdef CONFIG_RANDOMIZE_BASE
+
+unsigned char *choose_kernel_location(unsigned char *hint, unsigned long size)
+{
+ unsigned char *choice = hint;
+ unsigned long random;
+
+ if (cmdline_find_option_bool("noaslr")) {
+ debug_putstr("KASLR disabled...\n");
+ goto out;
+ }
+
+ /* XXX: choose random location. */
+
+out:
+ return choice;
+}
+
+#endif /* CONFIG_RANDOMIZE_BASE */
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index bffd73b..b68e303 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,6 +1,6 @@
#include "misc.h"

-#ifdef CONFIG_EARLY_PRINTK
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE

static unsigned long fs;
static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 5d6f689..a48631a 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -198,7 +198,7 @@ relocated:
* Jump to the decompressed kernel.
*/
xorl %ebx, %ebx
- jmp *%ebp
+ jmp *%eax

/*
* Stack and heap for uncompression
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 81ca174..bfd9be8 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -347,7 +347,7 @@ relocated:
/*
* Jump to the decompressed kernel.
*/
- jmp *%rbp
+ jmp *%rax

.code32
no_longmode:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b756a04..15a3ea8 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -391,7 +391,7 @@ static void parse_elf(void *output)
free(phdrs);
}

-asmlinkage void decompress_kernel(void *rmode, memptr heap,
+asmlinkage void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
@@ -418,6 +418,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;

+ output = choose_kernel_location(output, output_len);
+
+ /* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
@@ -437,5 +440,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
parse_elf(output);
handle_relocations(output, output_len);
debug_putstr("done.\nBooting the kernel.\n");
- return;
+ return output;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 674019d..2a5c7aa 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -39,23 +39,33 @@ static inline void debug_putstr(const char *s)

#endif

-#ifdef CONFIG_EARLY_PRINTK
-
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
+#endif
+
+#if CONFIG_RANDOMIZE_BASE
+/* aslr.c */
+unsigned char *choose_kernel_location(unsigned char *hint, unsigned long size);
+/* cpuflags.c */
+bool has_cpuflag(int flag);
+#else
+static inline unsigned char *choose_kernel_location(unsigned char *hint,
+ unsigned long size)
+{
+ return hint;
+}
+#endif

+#ifdef CONFIG_EARLY_PRINTK
/* early_serial_console.c */
extern int early_serial_base;
void console_init(void);
-
#else
-
-/* early_serial_console.c */
static const int early_serial_base;
static inline void console_init(void)
{ }
-
#endif

#endif
--
1.7.9.5

2013-04-26 19:05:59

by Kees Cook

[permalink] [raw]
Subject: [PATCH 2/6] x86: kaslr: move CPU flags out of cpucheck

Refactor the CPU flags handling out of the cpucheck routines so that
they can be reused by the future ASLR routines (in order to detect CPU
features like RDRAND and RDTSC).

This reworks has_eflag() and has_fpu() to be used on both 32-bit and
64-bit, and refactors the calls to cpuid to make them PIC-safe on 32-bit.

Signed-off-by: Kees Cook <[email protected]>
---
v2:
- clean up has_eflags and has_fpu to be 64-bit sane, thanks to HPA.
---
arch/x86/boot/Makefile | 2 +-
arch/x86/boot/boot.h | 10 +---
arch/x86/boot/compressed/Makefile | 2 +-
arch/x86/boot/compressed/cpuflags.c | 12 +++++
arch/x86/boot/cpucheck.c | 86 -----------------------------
arch/x86/boot/cpuflags.c | 101 +++++++++++++++++++++++++++++++++++
arch/x86/boot/cpuflags.h | 19 +++++++
7 files changed, 135 insertions(+), 97 deletions(-)
create mode 100644 arch/x86/boot/compressed/cpuflags.c
create mode 100644 arch/x86/boot/cpuflags.c
create mode 100644 arch/x86/boot/cpuflags.h

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 379814b..0da2e37 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed

-setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o
+setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y += video-mode.o version.o
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 5b75319..59c5ada 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -26,9 +26,8 @@
#include <asm/boot.h>
#include <asm/setup.h>
#include "bitops.h"
-#include <asm/cpufeature.h>
-#include <asm/processor-flags.h>
#include "ctype.h"
+#include "cpuflags.h"

/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option)
return __cmdline_find_option_bool(cmd_line_ptr, option);
}

-
/* cpu.c, cpucheck.c */
-struct cpu_features {
- int level; /* Family, or 64 for x86-64 */
- int model;
- u32 flags[NCAPINTS];
-};
-extern struct cpu_features cpu;
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int validate_cpu(void);

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 5ef205c..d854390 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,7 +26,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include

VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
- $(obj)/piggy.o
+ $(obj)/piggy.o $(obj)/cpuflags.o

$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone

diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
new file mode 100644
index 0000000..931cba6
--- /dev/null
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -0,0 +1,12 @@
+#ifdef CONFIG_RANDOMIZE_BASE
+
+#include "../cpuflags.c"
+
+bool has_cpuflag(int flag)
+{
+ get_flags();
+
+ return test_bit(flag, cpu.flags);
+}
+
+#endif
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 4d3ff03..e1f3c16 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -28,8 +28,6 @@
#include <asm/required-features.h>
#include <asm/msr-index.h>

-struct cpu_features cpu;
-static u32 cpu_vendor[3];
static u32 err_flags[NCAPINTS];

static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
@@ -69,90 +67,6 @@ static int is_transmeta(void)
cpu_vendor[2] == A32('M', 'x', '8', '6');
}

-static int has_fpu(void)
-{
- u16 fcw = -1, fsw = -1;
- u32 cr0;
-
- asm("movl %%cr0,%0" : "=r" (cr0));
- if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
- cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
- asm volatile("movl %0,%%cr0" : : "r" (cr0));
- }
-
- asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
- : "+m" (fsw), "+m" (fcw));
-
- return fsw == 0 && (fcw & 0x103f) == 0x003f;
-}
-
-static int has_eflag(u32 mask)
-{
- u32 f0, f1;
-
- asm("pushfl ; "
- "pushfl ; "
- "popl %0 ; "
- "movl %0,%1 ; "
- "xorl %2,%1 ; "
- "pushl %1 ; "
- "popfl ; "
- "pushfl ; "
- "popl %1 ; "
- "popfl"
- : "=&r" (f0), "=&r" (f1)
- : "ri" (mask));
-
- return !!((f0^f1) & mask);
-}
-
-static void get_flags(void)
-{
- u32 max_intel_level, max_amd_level;
- u32 tfms;
-
- if (has_fpu())
- set_bit(X86_FEATURE_FPU, cpu.flags);
-
- if (has_eflag(X86_EFLAGS_ID)) {
- asm("cpuid"
- : "=a" (max_intel_level),
- "=b" (cpu_vendor[0]),
- "=d" (cpu_vendor[1]),
- "=c" (cpu_vendor[2])
- : "a" (0));
-
- if (max_intel_level >= 0x00000001 &&
- max_intel_level <= 0x0000ffff) {
- asm("cpuid"
- : "=a" (tfms),
- "=c" (cpu.flags[4]),
- "=d" (cpu.flags[0])
- : "a" (0x00000001)
- : "ebx");
- cpu.level = (tfms >> 8) & 15;
- cpu.model = (tfms >> 4) & 15;
- if (cpu.level >= 6)
- cpu.model += ((tfms >> 16) & 0xf) << 4;
- }
-
- asm("cpuid"
- : "=a" (max_amd_level)
- : "a" (0x80000000)
- : "ebx", "ecx", "edx");
-
- if (max_amd_level >= 0x80000001 &&
- max_amd_level <= 0x8000ffff) {
- u32 eax = 0x80000001;
- asm("cpuid"
- : "+a" (eax),
- "=c" (cpu.flags[6]),
- "=d" (cpu.flags[1])
- : : "ebx");
- }
- }
-}
-
/* Returns a bitmask of which words we have error bits in */
static int check_flags(void)
{
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
new file mode 100644
index 0000000..e0cb1c3
--- /dev/null
+++ b/arch/x86/boot/cpuflags.c
@@ -0,0 +1,101 @@
+#include <linux/types.h>
+#include "bitops.h"
+
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "cpuflags.h"
+
+struct cpu_features cpu;
+u32 cpu_vendor[3];
+
+static bool loaded_flags;
+
+static int has_fpu(void)
+{
+ u16 fcw = -1, fsw = -1;
+ unsigned long cr0;
+
+ asm volatile("mov %%cr0,%0" : "=r" (cr0));
+ if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
+ cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
+ asm volatile("mov %0,%%cr0" : : "r" (cr0));
+ }
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ return fsw == 0 && (fcw & 0x103f) == 0x003f;
+}
+
+int has_eflag(unsigned long mask)
+{
+ unsigned long f0, f1;
+
+ asm volatile("pushf \n\t"
+ "pushf \n\t"
+ "pop %0 \n\t"
+ "mov %0,%1 \n\t"
+ "xor %2,%1 \n\t"
+ "push %1 \n\t"
+ "popf \n\t"
+ "pushf \n\t"
+ "pop %1 \n\t"
+ "popf"
+ : "=&r" (f0), "=&r" (f1)
+ : "ri" (mask));
+
+ return !!((f0^f1) & mask);
+}
+
+static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+ /* Handle x86_32 PIC using ebx. */
+ asm volatile("movl %%ebx, %%edi \n\t"
+ "cpuid \n\t"
+ "xchgl %%edi, %%ebx\n\t"
+ : "=a" (*a),
+ "=D" (*b),
+ "=c" (*c),
+ "=d" (*d)
+ : "a" (id)
+ );
+}
+
+void get_flags(void)
+{
+ u32 max_intel_level, max_amd_level;
+ u32 tfms;
+ u32 ignored;
+
+ if (loaded_flags)
+ return;
+ loaded_flags = true;
+
+ if (has_fpu())
+ set_bit(X86_FEATURE_FPU, cpu.flags);
+
+ if (has_eflag(X86_EFLAGS_ID)) {
+ cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
+ &cpu_vendor[1]);
+
+ if (max_intel_level >= 0x00000001 &&
+ max_intel_level <= 0x0000ffff) {
+ cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
+ &cpu.flags[0]);
+ cpu.level = (tfms >> 8) & 15;
+ cpu.model = (tfms >> 4) & 15;
+ if (cpu.level >= 6)
+ cpu.model += ((tfms >> 16) & 0xf) << 4;
+ }
+
+ cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
+ &ignored);
+
+ if (max_amd_level >= 0x80000001 &&
+ max_amd_level <= 0x8000ffff) {
+ cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
+ &cpu.flags[1]);
+ }
+ }
+}
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
new file mode 100644
index 0000000..9bb4e25
--- /dev/null
+++ b/arch/x86/boot/cpuflags.h
@@ -0,0 +1,19 @@
+#ifndef BOOT_CPUFLAGS_H
+#define BOOT_CPUFLAGS_H
+
+#include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
+
+struct cpu_features {
+ int level; /* Family, or 64 for x86-64 */
+ int model;
+ u32 flags[NCAPINTS];
+};
+
+extern struct cpu_features cpu;
+extern u32 cpu_vendor[3];
+
+int has_eflag(unsigned long mask);
+void get_flags(void);
+
+#endif
--
1.7.9.5

2013-04-26 20:14:12

by George Spelvin

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86: kaslr: select memory region from e820 maps

As a logic simplification, the following gets rid of one variable
that's simply not needed. (And adds a "static" declaration that seems
to be appropriate):

static bool largest_ram_region(unsigned long *start, unsigned long *size)
{
int i;

*size = 0;
for (i = 0; i < real_mode->e820_entries; i++) {
struct e820entry *entry = &real_mode->e820_map[i];

if (entry->type != E820_RAM)
continue;

if (entry->size > *size) {
*size = entry->size;
*start = entry->addr;
}
}
return *size != 0;
}

but I might instead do it as:

struct e820_entry const *largest_ram_region()
{
struct e820_entry const *rc = NULL;
unsigned long size = 0;
int i;

for (i = 0; i < real_mode->e820_entries; i++) {
struct e820entry const *entry = &real_mode->e820_map[i];

if (entry->type == E820_RAM && entry->size > size) {
size = entry->size;
rc = entry;
}
}
return rc;
}

... with appropriate adjustments to the caller. Anyway,
your choice.

2013-04-26 21:47:55

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86: kaslr: move CPU flags out of cpucheck

On 04/26/2013 12:03 PM, Kees Cook wrote:
> +
> +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
> +{
> + /* Handle x86_32 PIC using ebx. */
> + asm volatile("movl %%ebx, %%edi \n\t"
> + "cpuid \n\t"
> + "xchgl %%edi, %%ebx\n\t"
> + : "=a" (*a),
> + "=D" (*b),
> + "=c" (*c),
> + "=d" (*d)
> + : "a" (id)
> + );
> +}

Please don't constrain registers unnecessarily.

You can use "=r" there and let gcc assign whatever free register it pleases.

You can also limit that to only:

#if defined(__i386__) && defined(__PIC__)

-hpa

2013-04-26 21:48:09

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86: kaslr: return location from decompress_kernel

On 04/26/2013 12:03 PM, Kees Cook wrote:
> This allows decompress_kernel to return a new location for the kernel to
> be relocated to. With CONFIG_RANDOMIZE_BASE, the choose_kernel_location
> routine will select a new location to decompress the kernel, and is
> presently a no-op. The logic for bypassing this routine with "noaslr"
> on the kernel command line is handled.
>
> Signed-off-by: Kees Cook <[email protected]>
> ---
> Documentation/kernel-parameters.txt | 4 ++++
> arch/x86/boot/compressed/Makefile | 2 +-
> arch/x86/boot/compressed/aslr.c | 21 +++++++++++++++++++++
> arch/x86/boot/compressed/cmdline.c | 2 +-
> arch/x86/boot/compressed/head_32.S | 2 +-
> arch/x86/boot/compressed/head_64.S | 2 +-
> arch/x86/boot/compressed/misc.c | 7 +++++--
> arch/x86/boot/compressed/misc.h | 22 ++++++++++++++++------
> 8 files changed, 50 insertions(+), 12 deletions(-)
> create mode 100644 arch/x86/boot/compressed/aslr.c
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 8ccbf27..eb1c62c 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -1862,6 +1862,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> noapic [SMP,APIC] Tells the kernel to not make use of any
> IOAPICs that may be present in the system.
>
> + noaslr [X86]
> + Disable kernel base offset ASLR (Address Space
> + Layout Randomization) if built into the kernel.
> +
> noautogroup Disable scheduler automatic task group creation.
>
> nobats [PPC] Do not use BATs for mapping kernel lowmem

Calling it "nokaslr" might be useful to note that it is specifically
about the kernel.

-hpa

2013-04-26 21:50:56

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 4/6] x86: kaslr: select random base offset

On 04/26/2013 12:03 PM, Kees Cook wrote:
> +
> +static unsigned long get_random_long(void)
> +{
> + if (has_cpuflag(X86_FEATURE_RDRAND)) {
> + unsigned long random;
> +
> + debug_putstr("KASLR using RDRAND...\n");
> + if (rdrand(&random))
> + return random;
> + }
> +
> + if (has_cpuflag(X86_FEATURE_TSC)) {
> + uint32_t raw;
> + unsigned long timer;
> +
> + debug_putstr("KASLR using RDTSC...\n");
> + rdtscl(raw);
> +
> + /* Repeat the low bits of rdtsc. */
> + timer = raw & 0xffff;
> + timer |= (timer << 16);
> +#ifdef CONFIG_X86_64
> + timer |= (timer << 32) | (timer << 48);
> +#endif
> +

This seems like a very odd thing to do. If you want to scramble bits,
it would make more sense to do a multiply -- or much better, a
*circular* multiply -- with a large constant.

> + return timer;
> + }
> +
> + debug_putstr("KASLR found no entropy source...\n");
> + return 0;
> +}
>

It might be safe to assume that anything old enough to lack RDTSC
(basically a 486) will have an 8254, and reading back the 8254 counter
register.

-hpa

2013-04-26 21:51:56

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86: kaslr: select memory region from e820 maps

On Fri, Apr 26, 2013 at 12:03 PM, Kees Cook <[email protected]> wrote:
> This chooses the largest contiguous RAM region for the KASLR offset
> to live in.
>
> Signed-off-by: Kees Cook <[email protected]>
> ---
> v2:
> - make sure to exclude e820 regions outside the 32-bit memory range.

Do you need to execlude range that is used for initrd and possible
command_line and boot_param ?

Yinghai

2013-04-26 22:01:41

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86: kaslr: select memory region from e820 maps

On Fri, Apr 26, 2013 at 2:51 PM, Yinghai Lu <[email protected]> wrote:
> On Fri, Apr 26, 2013 at 12:03 PM, Kees Cook <[email protected]> wrote:
>> This chooses the largest contiguous RAM region for the KASLR offset
>> to live in.
>>
>> Signed-off-by: Kees Cook <[email protected]>
>> ---
>> v2:
>> - make sure to exclude e820 regions outside the 32-bit memory range.
>
> Do you need to execlude range that is used for initrd and possible
> command_line and boot_param ?

Yeah, and while doing a stress test here, I realized there's another
problem. In the original version of this, the stack and heap are set
up after relocation. In the C port, they're set up before, so there's
even more to avoid. To illustrate... here's a CONFIG_RELOCATABLE=n
boot:

LOAD_PHYS:0x0000000001000000
input: 0x0000000001dfe24d-0x00000000023db865
output: 0x0000000001000000-0x00000000023c98c0
heap: 0x00000000023e0740-0x00000000023e8740
stack: 0x00000000023ec698
chosen: 0x0000000001000000

(stack is just cheating and reporting sp in decompress_kernel)

And a CONFIG_RELOCATABLE=y and "noaslr" boot:

LOAD_PHYS:0x0000000001000000
input: 0x000000000108b25e-0x00000000016b3e96
output: 0x0000000000200000-0x00000000016a1db8
heap: 0x00000000016b9600-0x00000000016c1600
stack: 0x00000000016c5558
chosen: 0x0000000000200000

In that case, it's just so far under LOAD_PHYSICAL_START that it's
safe. But if KASLR picks an area overlapping input, heap, or stack
it's hosed. :)

-Kees

--
Kees Cook
Chrome OS Security

2013-04-26 22:01:54

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86: kaslr: select memory region from e820 maps

On 04/26/2013 02:51 PM, Yinghai Lu wrote:
> On Fri, Apr 26, 2013 at 12:03 PM, Kees Cook <[email protected]> wrote:
>> This chooses the largest contiguous RAM region for the KASLR offset
>> to live in.
>>
>> Signed-off-by: Kees Cook <[email protected]>
>> ---
>> v2:
>> - make sure to exclude e820 regions outside the 32-bit memory range.
>
> Do you need to execlude range that is used for initrd and possible
> command_line and boot_param ?
>

Yes, those would have to be excluded. For correctness the mem= and
memmap= options should also be taken into account.

-hpa

2013-04-26 22:13:10

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 6/6] x86: kaslr: report kernel offset on panic

On Fri, Apr 26, 2013 at 12:03:25PM -0700, Kees Cook wrote:
> When the system panics, include the kernel offset in the report to assist
> in debugging.
>
> Signed-off-by: Kees Cook <[email protected]>
> ---
> arch/x86/kernel/setup.c | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index fae9134..95a33b1 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -830,6 +830,18 @@ static void __init trim_low_memory_range(void)
> }
>
> /*
> + * Dump out kernel offset information on panic.
> + */
> +static int
> +dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
> +{
> + pr_emerg("Kernel Offset: 0x%lx\n",
> + (unsigned long)&_text - __START_KERNEL);

So what's wrong with subtracting the offset from the function addresses
on the stack so that traces can show the addresses as they are in
vmlinux, completely agnostic of any randomization?

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2013-04-26 22:14:44

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86: kaslr: move CPU flags out of cpucheck

On 04/26/2013 02:47 PM, H. Peter Anvin wrote:
> On 04/26/2013 12:03 PM, Kees Cook wrote:
>> +
>> +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
>> +{
>> + /* Handle x86_32 PIC using ebx. */
>> + asm volatile("movl %%ebx, %%edi \n\t"
>> + "cpuid \n\t"
>> + "xchgl %%edi, %%ebx\n\t"
>> + : "=a" (*a),
>> + "=D" (*b),
>> + "=c" (*c),
>> + "=d" (*d)
>> + : "a" (id)
>> + );
>> +}
>
> Please don't constrain registers unnecessarily.
>
> You can use "=r" there and let gcc assign whatever free register it pleases.
>
> You can also limit that to only:
>
> #if defined(__i386__) && defined(__PIC__)
>

How is this for a "beauty":


#if defined(__i386__) && defined (__PIC__)
# define EBX_REG "=r"
#else
# define EBX_REG "=b"
#endif

asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif ; "
"cpuid ; "
".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif"
: "=a" (*a), "=c" (*c), "=d" (*d),
EBX_REG (*b)
: "a" (leaf), "c" (subleaf));

2013-04-26 22:15:49

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 6/6] x86: kaslr: report kernel offset on panic

On 04/26/2013 03:13 PM, Borislav Petkov wrote:
> On Fri, Apr 26, 2013 at 12:03:25PM -0700, Kees Cook wrote:
>> When the system panics, include the kernel offset in the report to assist
>> in debugging.
>>
>> Signed-off-by: Kees Cook <[email protected]>
>> ---
>> arch/x86/kernel/setup.c | 24 ++++++++++++++++++++++++
>> 1 file changed, 24 insertions(+)
>>
>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> index fae9134..95a33b1 100644
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -830,6 +830,18 @@ static void __init trim_low_memory_range(void)
>> }
>>
>> /*
>> + * Dump out kernel offset information on panic.
>> + */
>> +static int
>> +dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
>> +{
>> + pr_emerg("Kernel Offset: 0x%lx\n",
>> + (unsigned long)&_text - __START_KERNEL);
>
> So what's wrong with subtracting the offset from the function addresses
> on the stack so that traces can show the addresses as they are in
> vmlinux, completely agnostic of any randomization?
>

That really makes it harder to figure out what the heck the register
content may mean...

-hpa

2013-04-26 22:19:23

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 6/6] x86: kaslr: report kernel offset on panic

On Fri, Apr 26, 2013 at 03:15:10PM -0700, H. Peter Anvin wrote:
> That really makes it harder to figure out what the heck the register
> content may mean...

So we need a goddam script to massage oopses now, if kaslr is enabled.

:-(

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2013-04-29 01:25:11

by James Morris

[permalink] [raw]
Subject: Re: [kernel-hardening] Re: [PATCH 3/6] x86: kaslr: return location from decompress_kernel

On Fri, 26 Apr 2013, H. Peter Anvin wrote:

> > + noaslr [X86]
> > + Disable kernel base offset ASLR (Address Space
> > + Layout Randomization) if built into the kernel.
> > +
> > noautogroup Disable scheduler automatic task group creation.
> >
> > nobats [PPC] Do not use BATs for mapping kernel lowmem
>
> Calling it "nokaslr" might be useful to note that it is specifically
> about the kernel.
>

Agreed.


--
James Morris
<[email protected]>

2013-04-29 17:43:17

by Kees Cook

[permalink] [raw]
Subject: Re: [kernel-hardening] Re: [PATCH 3/6] x86: kaslr: return location from decompress_kernel

On Sun, Apr 28, 2013 at 6:25 PM, James Morris <[email protected]> wrote:
> On Fri, 26 Apr 2013, H. Peter Anvin wrote:
>
>> > + noaslr [X86]
>> > + Disable kernel base offset ASLR (Address Space
>> > + Layout Randomization) if built into the kernel.
>> > +
>> > noautogroup Disable scheduler automatic task group creation.
>> >
>> > nobats [PPC] Do not use BATs for mapping kernel lowmem
>>
>> Calling it "nokaslr" might be useful to note that it is specifically
>> about the kernel.
>
> Agreed.

Yeah, good call. I'll get this changed. Thanks!

-Kees

--
Kees Cook
Chrome OS Security

2013-04-29 17:49:45

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86: kaslr: move CPU flags out of cpucheck

On Fri, Apr 26, 2013 at 3:14 PM, H. Peter Anvin <[email protected]> wrote:
> On 04/26/2013 02:47 PM, H. Peter Anvin wrote:
>> On 04/26/2013 12:03 PM, Kees Cook wrote:
>>> +
>>> +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
>>> +{
>>> + /* Handle x86_32 PIC using ebx. */
>>> + asm volatile("movl %%ebx, %%edi \n\t"
>>> + "cpuid \n\t"
>>> + "xchgl %%edi, %%ebx\n\t"
>>> + : "=a" (*a),
>>> + "=D" (*b),
>>> + "=c" (*c),
>>> + "=d" (*d)
>>> + : "a" (id)
>>> + );
>>> +}
>>
>> Please don't constrain registers unnecessarily.
>>
>> You can use "=r" there and let gcc assign whatever free register it pleases.
>>
>> You can also limit that to only:
>>
>> #if defined(__i386__) && defined(__PIC__)
>>
>
> How is this for a "beauty":
>
>
> #if defined(__i386__) && defined (__PIC__)
> # define EBX_REG "=r"
> #else
> # define EBX_REG "=b"
> #endif
>
> asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif ; "
> "cpuid ; "
> ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif"
> : "=a" (*a), "=c" (*c), "=d" (*d),
> EBX_REG (*b)
> : "a" (leaf), "c" (subleaf));
>

Oh, very nice on the ifnc and register define! Is the leaf/subleaf
stuff needed there? That piece doesn't make sense to me.

-Kees

--
Kees Cook
Chrome OS Security

2013-04-29 17:53:01

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86: kaslr: move CPU flags out of cpucheck

On Mon, Apr 29, 2013 at 10:49 AM, Kees Cook <[email protected]> wrote:
> On Fri, Apr 26, 2013 at 3:14 PM, H. Peter Anvin <[email protected]> wrote:
>> On 04/26/2013 02:47 PM, H. Peter Anvin wrote:
>>> On 04/26/2013 12:03 PM, Kees Cook wrote:
>>>> +
>>>> +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
>>>> +{
>>>> + /* Handle x86_32 PIC using ebx. */
>>>> + asm volatile("movl %%ebx, %%edi \n\t"
>>>> + "cpuid \n\t"
>>>> + "xchgl %%edi, %%ebx\n\t"
>>>> + : "=a" (*a),
>>>> + "=D" (*b),
>>>> + "=c" (*c),
>>>> + "=d" (*d)
>>>> + : "a" (id)
>>>> + );
>>>> +}
>>>
>>> Please don't constrain registers unnecessarily.
>>>
>>> You can use "=r" there and let gcc assign whatever free register it pleases.
>>>
>>> You can also limit that to only:
>>>
>>> #if defined(__i386__) && defined(__PIC__)
>>>
>>
>> How is this for a "beauty":
>>
>>
>> #if defined(__i386__) && defined (__PIC__)
>> # define EBX_REG "=r"
>> #else
>> # define EBX_REG "=b"
>> #endif
>>
>> asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif ; "
>> "cpuid ; "
>> ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif"
>> : "=a" (*a), "=c" (*c), "=d" (*d),
>> EBX_REG (*b)
>> : "a" (leaf), "c" (subleaf));
>>
>
> Oh, very nice on the ifnc and register define! Is the leaf/subleaf
> stuff needed there? That piece doesn't make sense to me.

Ah, nevermind, I just need the "leaf" bit for the cpuid input. Thanks!

-Kees

--
Kees Cook
Chrome OS Security

2013-04-29 19:15:57

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 4/6] x86: kaslr: select random base offset

On Fri, Apr 26, 2013 at 2:50 PM, H. Peter Anvin <[email protected]> wrote:
> On 04/26/2013 12:03 PM, Kees Cook wrote:
>> +
>> +static unsigned long get_random_long(void)
>> +{
>> + if (has_cpuflag(X86_FEATURE_RDRAND)) {
>> + unsigned long random;
>> +
>> + debug_putstr("KASLR using RDRAND...\n");
>> + if (rdrand(&random))
>> + return random;
>> + }
>> +
>> + if (has_cpuflag(X86_FEATURE_TSC)) {
>> + uint32_t raw;
>> + unsigned long timer;
>> +
>> + debug_putstr("KASLR using RDTSC...\n");
>> + rdtscl(raw);
>> +
>> + /* Repeat the low bits of rdtsc. */
>> + timer = raw & 0xffff;
>> + timer |= (timer << 16);
>> +#ifdef CONFIG_X86_64
>> + timer |= (timer << 32) | (timer << 48);
>> +#endif
>> +
>
> This seems like a very odd thing to do. If you want to scramble bits,
> it would make more sense to do a multiply -- or much better, a
> *circular* multiply -- with a large constant.

Well, my thought here was that the entropy is only being used in a
very narrow band (since it's truncated by alignment on the low end and
physical memory on the high end), so I didn't want to "dilute" the
already bad entropy any more. Instead, I just repeated it. If a
circular multiply would serve the same purpose, I can do that. Do you
have any examples of that?

>
>> + return timer;
>> + }
>> +
>> + debug_putstr("KASLR found no entropy source...\n");
>> + return 0;
>> +}
>>
>
> It might be safe to assume that anything old enough to lack RDTSC
> (basically a 486) will have an 8254, and reading back the 8254 counter
> register.

Ah, good idea. I've added this now, which seems to work if I force the
TSC check to fail:

#define I8254_PORT_CONTROL 0x43
#define I8254_PORT_COUNTER0 0x40
#define I8254_CMD_READBACK 0xC0
#define I8254_SELECT_COUNTER0 0x02
#define I8254_STATUS_NOTREADY 0x40
static inline u16 i8254(void)
{
u16 status, timer;

do {
outb(I8254_PORT_CONTROL,
I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
status = inb(I8254_PORT_COUNTER0);
timer = inb(I8254_PORT_COUNTER0);
timer |= inb(I8254_PORT_COUNTER0) << 8;
} while (status & I8254_STATUS_NOTREADY);

return timer;
}

Thanks,

-Kees

--
Kees Cook
Chrome OS Security