2013-10-03 20:53:54

by Kees Cook

[permalink] [raw]
Subject: [PATCH v7 0/7] Kernel base address randomization on x86

Updated from v6 with some suggestions from HPA.

As before, this detects and uses various available randomness sources to
choose an available slot from the e820 regions for kernel base address
relocation.

This is rolled out on Chrome OS devices, and working well.

-Kees


2013-10-03 20:53:53

by Kees Cook

[permalink] [raw]
Subject: [PATCH 4/7] x86, kaslr: select random base offset

Select a random location when CONFIG_RANDOMIZE_BASE is used, bounded
by CONFIG_RANDOMIZE_BASE_MAX_OFFSET. Sources of randomness currently
include RDRAND, RDTSC, or the i8254.

This moves the pre-alternatives inline rdrand function into the header so
both pieces of code can use it. Availability of RDRAND is then controlled
by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR.

Signed-off-by: Kees Cook <[email protected]>
---
v7:
- move rdrand_long into header to avoid code duplication; HPA.
v3:
- fall back to reading the i8254 when no TSC; HPA.
v2:
- use rdtscl from msr.h; Mathias Krause.
---
arch/x86/boot/compressed/aslr.c | 70 ++++++++++++++++++++++++++++++++++++-
arch/x86/boot/compressed/misc.h | 2 ++
arch/x86/include/asm/archrandom.h | 21 +++++++++++
arch/x86/kernel/cpu/rdrand.c | 14 --------
4 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index ed7e9f0..5bb7c63 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -1,6 +1,59 @@
#include "misc.h"

#ifdef CONFIG_RANDOMIZE_BASE
+#include <asm/msr.h>
+#include <asm/archrandom.h>
+
+#define I8254_PORT_CONTROL 0x43
+#define I8254_PORT_COUNTER0 0x40
+#define I8254_CMD_READBACK 0xC0
+#define I8254_SELECT_COUNTER0 0x02
+#define I8254_STATUS_NOTREADY 0x40
+static inline u16 i8254(void)
+{
+ u16 status, timer;
+
+ do {
+ outb(I8254_PORT_CONTROL,
+ I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+ status = inb(I8254_PORT_COUNTER0);
+ timer = inb(I8254_PORT_COUNTER0);
+ timer |= inb(I8254_PORT_COUNTER0) << 8;
+ } while (status & I8254_STATUS_NOTREADY);
+
+ return timer;
+}
+
+static unsigned long get_random_long(void)
+{
+ unsigned long random;
+
+ if (has_cpuflag(X86_FEATURE_RDRAND)) {
+ debug_putstr("KASLR using RDRAND...\n");
+ if (rdrand_long(&random))
+ return random;
+ }
+
+ if (has_cpuflag(X86_FEATURE_TSC)) {
+ uint32_t raw;
+
+ debug_putstr("KASLR using RDTSC...\n");
+ rdtscl(raw);
+
+ /* Only use the low bits of rdtsc. */
+ random = raw & 0xffff;
+ } else {
+ debug_putstr("KASLR using i8254...\n");
+ random = i8254();
+ }
+
+ /* Extend timer bits poorly... */
+ random |= (random << 16);
+#ifdef CONFIG_X86_64
+ random |= (random << 32);
+#endif
+ return random;
+}

static unsigned long find_minimum_location(unsigned long input,
unsigned long input_size,
@@ -55,6 +108,7 @@ unsigned char *choose_kernel_location(unsigned char *input,
unsigned long output_size)
{
unsigned long choice = (unsigned long)output;
+ unsigned long random;

if (cmdline_find_option_bool("nokaslr")) {
debug_putstr("KASLR disabled...\n");
@@ -64,8 +118,22 @@ unsigned char *choose_kernel_location(unsigned char *input,
choice = find_minimum_location((unsigned long)input, input_size,
(unsigned long)output, output_size);

- /* XXX: choose random location. */
+ /* XXX: Find an appropriate E820 hole, instead of adding offset. */
+ random = get_random_long();
+
+ /* Clip off top of the range. */
+ random &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1);
+ while (random + output_size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
+ random >>= 1;
+
+ /* Clip off bottom of range. */
+ random &= ~(choice - 1);
+
+ /* Always enforce the minimum. */
+ if (random < choice)
+ goto out;

+ choice = random;
out:
return (unsigned char *)choice;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 42f71bb..24e3e56 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -60,6 +60,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size);
+/* cpuflags.c */
+bool has_cpuflag(int flag);
#else
static inline
unsigned char *choose_kernel_location(unsigned char *input,
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 0d9ec77..e6a9245 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -39,6 +39,20 @@

#ifdef CONFIG_ARCH_RANDOM

+/* Instead of arch_get_random_long() when alternatives haven't run. */
+static inline int rdrand_long(unsigned long *v)
+{
+ int ok;
+ asm volatile("1: " RDRAND_LONG "\n\t"
+ "jc 2f\n\t"
+ "decl %0\n\t"
+ "jnz 1b\n\t"
+ "2:"
+ : "=r" (ok), "=a" (*v)
+ : "0" (RDRAND_RETRY_LOOPS));
+ return ok;
+}
+
#define GET_RANDOM(name, type, rdrand, nop) \
static inline int name(type *v) \
{ \
@@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);

#endif /* CONFIG_X86_64 */

+#else
+
+static inline int rdrand_long(unsigned long *v)
+{
+ return 0;
+}
+
#endif /* CONFIG_ARCH_RANDOM */

extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 88db010..384df51 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s)
}
__setup("nordrand", x86_rdrand_setup);

-/* We can't use arch_get_random_long() here since alternatives haven't run */
-static inline int rdrand_long(unsigned long *v)
-{
- int ok;
- asm volatile("1: " RDRAND_LONG "\n\t"
- "jc 2f\n\t"
- "decl %0\n\t"
- "jnz 1b\n\t"
- "2:"
- : "=r" (ok), "=a" (*v)
- : "0" (RDRAND_RETRY_LOOPS));
- return ok;
-}
-
/*
* Force a reseed cycle; we are architecturally guaranteed a reseed
* after no more than 512 128-bit chunks of random data. This also
--
1.7.9.5

2013-10-03 20:53:55

by Kees Cook

[permalink] [raw]
Subject: [PATCH 6/7] x86, kaslr: report kernel offset on panic

When the system panics, include the kernel offset in the report to assist
in debugging.

Signed-off-by: Kees Cook <[email protected]>
---
arch/x86/kernel/setup.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f0de629..1708862 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -824,6 +824,20 @@ static void __init trim_low_memory_range(void)
}

/*
+ * Dump out kernel offset information on panic.
+ */
+static int
+dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
+{
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
+ "(relocation range: 0x%lx-0x%lx)\n",
+ (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
+ __START_KERNEL_map, MODULES_VADDR-1);
+
+ return 0;
+}
+
+/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
* for initialization. Note, the efi init code path is determined by the
@@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void)
}

#endif /* CONFIG_X86_32 */
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+ return 0;
+}
+__initcall(register_kernel_offset_dumper);
--
1.7.9.5

2013-10-03 20:54:05

by Kees Cook

[permalink] [raw]
Subject: [PATCH 1/7] x86, boot: move CPU flags out of cpucheck

Refactor the CPU flags handling out of the cpucheck routines so that
they can be reused by the future ASLR routines (in order to detect CPU
features like RDRAND and RDTSC).

This reworks has_eflag() and has_fpu() to be used on both 32-bit and
64-bit, and refactors the calls to cpuid to make them PIC-safe on 32-bit.

Signed-off-by: Kees Cook <[email protected]>

---
v7:
- renamed patch to "boot" instead of "kaslr"; HPA.
v3:
- do not constrain registers in cpuid call; HPA.
v2:
- clean up has_eflags and has_fpu to be 64-bit sane; HPA.
---
arch/x86/boot/Makefile | 2 +-
arch/x86/boot/boot.h | 10 +---
arch/x86/boot/compressed/Makefile | 2 +-
arch/x86/boot/compressed/cpuflags.c | 12 ++++
arch/x86/boot/cpucheck.c | 86 -----------------------------
arch/x86/boot/cpuflags.c | 104 +++++++++++++++++++++++++++++++++++
arch/x86/boot/cpuflags.h | 19 +++++++
7 files changed, 138 insertions(+), 97 deletions(-)
create mode 100644 arch/x86/boot/compressed/cpuflags.c
create mode 100644 arch/x86/boot/cpuflags.c
create mode 100644 arch/x86/boot/cpuflags.h

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 379814b..0da2e37 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed

-setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o
+setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y += video-mode.o version.o
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index ef72bae..50f8c5e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -26,9 +26,8 @@
#include <asm/boot.h>
#include <asm/setup.h>
#include "bitops.h"
-#include <asm/cpufeature.h>
-#include <asm/processor-flags.h>
#include "ctype.h"
+#include "cpuflags.h"

/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option)
return __cmdline_find_option_bool(cmd_line_ptr, option);
}

-
/* cpu.c, cpucheck.c */
-struct cpu_features {
- int level; /* Family, or 64 for x86-64 */
- int model;
- u32 flags[NCAPINTS];
-};
-extern struct cpu_features cpu;
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int validate_cpu(void);

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index dcd90df..3312f1b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include

VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
- $(obj)/piggy.o
+ $(obj)/piggy.o $(obj)/cpuflags.o

$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone

diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
new file mode 100644
index 0000000..931cba6
--- /dev/null
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -0,0 +1,12 @@
+#ifdef CONFIG_RANDOMIZE_BASE
+
+#include "../cpuflags.c"
+
+bool has_cpuflag(int flag)
+{
+ get_flags();
+
+ return test_bit(flag, cpu.flags);
+}
+
+#endif
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 4d3ff03..e1f3c16 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -28,8 +28,6 @@
#include <asm/required-features.h>
#include <asm/msr-index.h>

-struct cpu_features cpu;
-static u32 cpu_vendor[3];
static u32 err_flags[NCAPINTS];

static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
@@ -69,90 +67,6 @@ static int is_transmeta(void)
cpu_vendor[2] == A32('M', 'x', '8', '6');
}

-static int has_fpu(void)
-{
- u16 fcw = -1, fsw = -1;
- u32 cr0;
-
- asm("movl %%cr0,%0" : "=r" (cr0));
- if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
- cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
- asm volatile("movl %0,%%cr0" : : "r" (cr0));
- }
-
- asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
- : "+m" (fsw), "+m" (fcw));
-
- return fsw == 0 && (fcw & 0x103f) == 0x003f;
-}
-
-static int has_eflag(u32 mask)
-{
- u32 f0, f1;
-
- asm("pushfl ; "
- "pushfl ; "
- "popl %0 ; "
- "movl %0,%1 ; "
- "xorl %2,%1 ; "
- "pushl %1 ; "
- "popfl ; "
- "pushfl ; "
- "popl %1 ; "
- "popfl"
- : "=&r" (f0), "=&r" (f1)
- : "ri" (mask));
-
- return !!((f0^f1) & mask);
-}
-
-static void get_flags(void)
-{
- u32 max_intel_level, max_amd_level;
- u32 tfms;
-
- if (has_fpu())
- set_bit(X86_FEATURE_FPU, cpu.flags);
-
- if (has_eflag(X86_EFLAGS_ID)) {
- asm("cpuid"
- : "=a" (max_intel_level),
- "=b" (cpu_vendor[0]),
- "=d" (cpu_vendor[1]),
- "=c" (cpu_vendor[2])
- : "a" (0));
-
- if (max_intel_level >= 0x00000001 &&
- max_intel_level <= 0x0000ffff) {
- asm("cpuid"
- : "=a" (tfms),
- "=c" (cpu.flags[4]),
- "=d" (cpu.flags[0])
- : "a" (0x00000001)
- : "ebx");
- cpu.level = (tfms >> 8) & 15;
- cpu.model = (tfms >> 4) & 15;
- if (cpu.level >= 6)
- cpu.model += ((tfms >> 16) & 0xf) << 4;
- }
-
- asm("cpuid"
- : "=a" (max_amd_level)
- : "a" (0x80000000)
- : "ebx", "ecx", "edx");
-
- if (max_amd_level >= 0x80000001 &&
- max_amd_level <= 0x8000ffff) {
- u32 eax = 0x80000001;
- asm("cpuid"
- : "+a" (eax),
- "=c" (cpu.flags[6]),
- "=d" (cpu.flags[1])
- : : "ebx");
- }
- }
-}
-
/* Returns a bitmask of which words we have error bits in */
static int check_flags(void)
{
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
new file mode 100644
index 0000000..b02544a
--- /dev/null
+++ b/arch/x86/boot/cpuflags.c
@@ -0,0 +1,104 @@
+#include <linux/types.h>
+#include "bitops.h"
+
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "cpuflags.h"
+
+struct cpu_features cpu;
+u32 cpu_vendor[3];
+
+static bool loaded_flags;
+
+static int has_fpu(void)
+{
+ u16 fcw = -1, fsw = -1;
+ unsigned long cr0;
+
+ asm volatile("mov %%cr0,%0" : "=r" (cr0));
+ if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
+ cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
+ asm volatile("mov %0,%%cr0" : : "r" (cr0));
+ }
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ return fsw == 0 && (fcw & 0x103f) == 0x003f;
+}
+
+int has_eflag(unsigned long mask)
+{
+ unsigned long f0, f1;
+
+ asm volatile("pushf \n\t"
+ "pushf \n\t"
+ "pop %0 \n\t"
+ "mov %0,%1 \n\t"
+ "xor %2,%1 \n\t"
+ "push %1 \n\t"
+ "popf \n\t"
+ "pushf \n\t"
+ "pop %1 \n\t"
+ "popf"
+ : "=&r" (f0), "=&r" (f1)
+ : "ri" (mask));
+
+ return !!((f0^f1) & mask);
+}
+
+/* Handle x86_32 PIC using ebx. */
+#if defined(__i386__) && defined(__PIC__)
+# define EBX_REG "=r"
+#else
+# define EBX_REG "=b"
+#endif
+
+static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+ asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
+ "cpuid \n\t"
+ ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
+ : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
+ : "a" (id)
+ );
+}
+
+void get_flags(void)
+{
+ u32 max_intel_level, max_amd_level;
+ u32 tfms;
+ u32 ignored;
+
+ if (loaded_flags)
+ return;
+ loaded_flags = true;
+
+ if (has_fpu())
+ set_bit(X86_FEATURE_FPU, cpu.flags);
+
+ if (has_eflag(X86_EFLAGS_ID)) {
+ cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
+ &cpu_vendor[1]);
+
+ if (max_intel_level >= 0x00000001 &&
+ max_intel_level <= 0x0000ffff) {
+ cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
+ &cpu.flags[0]);
+ cpu.level = (tfms >> 8) & 15;
+ cpu.model = (tfms >> 4) & 15;
+ if (cpu.level >= 6)
+ cpu.model += ((tfms >> 16) & 0xf) << 4;
+ }
+
+ cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
+ &ignored);
+
+ if (max_amd_level >= 0x80000001 &&
+ max_amd_level <= 0x8000ffff) {
+ cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
+ &cpu.flags[1]);
+ }
+ }
+}
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
new file mode 100644
index 0000000..9bb4e25
--- /dev/null
+++ b/arch/x86/boot/cpuflags.h
@@ -0,0 +1,19 @@
+#ifndef BOOT_CPUFLAGS_H
+#define BOOT_CPUFLAGS_H
+
+#include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
+
+struct cpu_features {
+ int level; /* Family, or 64 for x86-64 */
+ int model;
+ u32 flags[NCAPINTS];
+};
+
+extern struct cpu_features cpu;
+extern u32 cpu_vendor[3];
+
+int has_eflag(unsigned long mask);
+void get_flags(void);
+
+#endif
--
1.7.9.5

2013-10-03 20:54:08

by Kees Cook

[permalink] [raw]
Subject: [PATCH 3/7] x86, kaslr: find minimum safe relocation position

Examine all the known unsafe areas and avoid them by just raising the
minimum relocation position to be past them.

Signed-off-by: Kees Cook <[email protected]>
---
arch/x86/boot/compressed/aslr.c | 50 +++++++++++++++++++++++++++++++++++++++
arch/x86/boot/compressed/misc.c | 10 ++------
arch/x86/boot/compressed/misc.h | 8 +++++++
3 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index b73cc66..ed7e9f0 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -2,6 +2,53 @@

#ifdef CONFIG_RANDOMIZE_BASE

+static unsigned long find_minimum_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long output,
+ unsigned long output_size)
+{
+ u64 initrd_start, initrd_size;
+ u64 cmd_line, cmd_line_size;
+ unsigned long unsafe, unsafe_len;
+ char *ptr;
+
+ /*
+ * Mark off the region that is unsafe to overlap during
+ * decompression (see calculations at top of misc.c).
+ */
+ unsafe_len = (output_size >> 12) + 32768 + 18;
+ unsafe = (unsigned long)input + input_size - unsafe_len;
+
+ /*
+ * Locate other regions that cannot be over-written during
+ * decompression: initrd, cmd_line.
+ */
+ initrd_start = (u64)real_mode->ext_ramdisk_image << 32;
+ initrd_start |= real_mode->hdr.ramdisk_image;
+ initrd_size = (u64)real_mode->ext_ramdisk_size << 32;
+ initrd_size |= real_mode->hdr.ramdisk_size;
+ cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32;
+ cmd_line |= real_mode->hdr.cmd_line_ptr;
+ /* Calculate size of cmd_line. */
+ ptr = (char *)(unsigned long)cmd_line;
+ for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+ ;
+
+ /* Minimum location must be above all these regions: */
+ output = max(output, unsafe + unsafe_len);
+ output = max(output, (unsigned long)free_mem_ptr + BOOT_HEAP_SIZE);
+ output = max(output, (unsigned long)free_mem_end_ptr + BOOT_STACK_SIZE);
+ output = max(output, (unsigned long)initrd_start
+ + (unsigned long)initrd_size);
+ output = max(output, (unsigned long)cmd_line
+ + (unsigned long)cmd_line_size);
+
+ /* Make sure the location is still aligned. */
+ output = ALIGN(output, CONFIG_PHYSICAL_ALIGN);
+
+ return output;
+}
+
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
@@ -14,6 +61,9 @@ unsigned char *choose_kernel_location(unsigned char *input,
goto out;
}

+ choice = find_minimum_location((unsigned long)input, input_size,
+ (unsigned long)output, output_size);
+
/* XXX: choose random location. */

out:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 7138768..196eaf3 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */
void *memset(void *s, int c, size_t n);
void *memcpy(void *dest, const void *src, size_t n);

-#ifdef CONFIG_X86_64
-#define memptr long
-#else
-#define memptr unsigned
-#endif
-
-static memptr free_mem_ptr;
-static memptr free_mem_end_ptr;
+memptr free_mem_ptr;
+memptr free_mem_end_ptr;

static char *vidmem;
static int vidport;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 9077af7..42f71bb 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,15 @@
#define BOOT_BOOT_H
#include "../ctype.h"

+#ifdef CONFIG_X86_64
+#define memptr long
+#else
+#define memptr unsigned
+#endif
+
/* misc.c */
+extern memptr free_mem_ptr;
+extern memptr free_mem_end_ptr;
extern struct boot_params *real_mode; /* Pointer to real-mode data */
void __putstr(const char *s);
#define error_putstr(__x) __putstr(__x)
--
1.7.9.5

2013-10-03 20:54:55

by Kees Cook

[permalink] [raw]
Subject: [PATCH 7/7] x86, kaslr: raise max positions to 1GiB on x86_64

On 64-bit, this raises the maximum location to 1GiB, the upper limit
currently, since the kernel fixmap page mappings need to be moved to
use the other 1GiB (which would be the theoretical limit when building
with -mcmodel=kernel).

Signed-off-by: Kees Cook <[email protected]>
---
arch/x86/Kconfig | 16 +++++++++++++---
arch/x86/include/asm/page_64_types.h | 15 ++++++++++++---
arch/x86/include/asm/pgtable_64_types.h | 2 +-
arch/x86/mm/init_32.c | 3 +++
4 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 992701d..51f4399 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1746,13 +1746,23 @@ config RANDOMIZE_BASE
config RANDOMIZE_BASE_MAX_OFFSET
hex "Maximum ASLR offset allowed"
depends on RANDOMIZE_BASE
- default "0x10000000"
- range 0x0 0x10000000
+ range 0x0 0x20000000 if X86_32
+ default "0x20000000" if X86_32
+ range 0x0 0x40000000 if X86_64
+ default "0x40000000" if X86_64
---help---
Determines the maximal offset in bytes that will be applied to the
kernel when Address Space Layout Randomization (ASLR) is active.
Must be less than or equal to the actual physical memory on the
- system. This must be a power of two.
+ system. This must be a multiple of CONFIG_PHYSICAL_ALIGN.
+
+ On 32-bit this is limited to 512MiB.
+
+ On 64-bit this is limited by how the kernel fixmap page table is
+ positioned, so this cannot be larger that 1GiB currently. Normally
+ there is a 512MiB to 1.5GiB split between kernel and modules. When
+ this is raised above the 512MiB default, the modules area will
+ shrink to compensate, up to the current maximum 1GiB to 1GiB split.

# Relocation on x86 needs some additional build support
config X86_NEED_RELOCS
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 43dcd80..8de6d9c 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -39,9 +39,18 @@
#define __VIRTUAL_MASK_SHIFT 47

/*
- * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
- * arch/x86/kernel/head_64.S), and it is mapped here:
+ * Kernel image size is limited to 1GiB due to the fixmap living in the
+ * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use
+ * 512MiB by default, leaving 1.5GiB for modules once the page tables
+ * are fully set up. If kernel ASLR is configured, it can extend the
+ * kernel page table mapping, reducing the size of the modules area.
*/
-#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024)
+#if defined(CONFIG_RANDOMIZE_BASE) && \
+ CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT
+#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET
+#else
+#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT
+#endif

#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 2d88344..c883bf7 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t;
#define VMALLOC_START _AC(0xffffc90000000000, UL)
#define VMALLOC_END _AC(0xffffe8ffffffffff, UL)
#define VMEMMAP_START _AC(0xffffea0000000000, UL)
-#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
+#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4287f1f..5bdc543 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -806,6 +806,9 @@ void __init mem_init(void)
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP
+#ifdef CONFIG_RANDOMIZE_BASE
+ BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
+#endif

#ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
--
1.7.9.5

2013-10-03 20:55:25

by Kees Cook

[permalink] [raw]
Subject: [PATCH 2/7] x86, kaslr: return location from decompress_kernel

This allows decompress_kernel to return a new location for the kernel to
be relocated to. Additionally, enforces CONFIG_PHYSICAL_START as the
minimum relocation position when building with CONFIG_RELOCATABLE.

With CONFIG_RANDOMIZE_BASE set, the choose_kernel_location routine
will select a new location to decompress the kernel, though here it is
presently a no-op. The kernel command line option "nokaslr" is introduced
to bypass these routines.

Signed-off-by: Kees Cook <[email protected]>
---
v3:
- treat LOAD_PHYSICAL_ADDR as minimum.
v2:
- renamed "noaslr" to "nokaslr"; HPA.
---
Documentation/kernel-parameters.txt | 4 ++++
arch/x86/Kconfig | 38 +++++++++++++++++++++++++++++++----
arch/x86/boot/compressed/Makefile | 2 +-
arch/x86/boot/compressed/aslr.c | 23 +++++++++++++++++++++
arch/x86/boot/compressed/cmdline.c | 2 +-
arch/x86/boot/compressed/head_32.S | 10 +++++----
arch/x86/boot/compressed/head_64.S | 16 +++++++++------
arch/x86/boot/compressed/misc.c | 8 ++++++--
arch/x86/boot/compressed/misc.h | 27 +++++++++++++++++++------
9 files changed, 106 insertions(+), 24 deletions(-)
create mode 100644 arch/x86/boot/compressed/aslr.c

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fcbb736..773fc4c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1975,6 +1975,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noapic [SMP,APIC] Tells the kernel to not make use of any
IOAPICs that may be present in the system.

+ nokaslr [X86]
+ Disable kernel base offset ASLR (Address Space
+ Layout Randomization) if built into the kernel.
+
noautogroup Disable scheduler automatic task group creation.

nobats [PPC] Do not use BATs for mapping kernel lowmem
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ee2fb9d..992701d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1722,16 +1722,46 @@ config RELOCATABLE

Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
it has been loaded at and the compile time physical address
- (CONFIG_PHYSICAL_START) is ignored.
+ (CONFIG_PHYSICAL_START) is used as the minimum location.

-# Relocation on x86-32 needs some additional build support
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+ depends on RELOCATABLE
+ depends on !HIBERNATION
+ default n
+ ---help---
+ Randomizes the physical and virtual address at which the
+ kernel image is decompressed, as a security feature that
+ deters exploit attempts relying on knowledge of the location
+ of kernel internals.
+
+ Entropy is generated using the RDRAND instruction if it
+ is supported. If not, then RDTSC is used, if supported. If
+ neither RDRAND nor RDTSC are supported, then no randomness
+ is introduced.
+
+ The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
+ and aligned according to PHYSICAL_ALIGN.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+ hex "Maximum ASLR offset allowed"
+ depends on RANDOMIZE_BASE
+ default "0x10000000"
+ range 0x0 0x10000000
+ ---help---
+ Determines the maximal offset in bytes that will be applied to the
+ kernel when Address Space Layout Randomization (ASLR) is active.
+ Must be less than or equal to the actual physical memory on the
+ system. This must be a power of two.
+
+# Relocation on x86 needs some additional build support
config X86_NEED_RELOCS
def_bool y
- depends on X86_32 && RELOCATABLE
+ depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)

config PHYSICAL_ALIGN
hex "Alignment value to which kernel should be aligned"
- default "0x1000000"
+ default "0x200000"
range 0x2000 0x1000000 if X86_32
range 0x200000 0x1000000 if X86_64
---help---
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 3312f1b..ae8b5db 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include

VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
- $(obj)/piggy.o $(obj)/cpuflags.o
+ $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o

$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
new file mode 100644
index 0000000..b73cc66
--- /dev/null
+++ b/arch/x86/boot/compressed/aslr.c
@@ -0,0 +1,23 @@
+#include "misc.h"
+
+#ifdef CONFIG_RANDOMIZE_BASE
+
+unsigned char *choose_kernel_location(unsigned char *input,
+ unsigned long input_size,
+ unsigned char *output,
+ unsigned long output_size)
+{
+ unsigned long choice = (unsigned long)output;
+
+ if (cmdline_find_option_bool("nokaslr")) {
+ debug_putstr("KASLR disabled...\n");
+ goto out;
+ }
+
+ /* XXX: choose random location. */
+
+out:
+ return (unsigned char *)choice;
+}
+
+#endif /* CONFIG_RANDOMIZE_BASE */
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index bffd73b..b68e303 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,6 +1,6 @@
#include "misc.h"

-#ifdef CONFIG_EARLY_PRINTK
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE

static unsigned long fs;
static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 5d6f689..9116aac 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -117,9 +117,11 @@ preferred_addr:
addl %eax, %ebx
notl %eax
andl %eax, %ebx
-#else
- movl $LOAD_PHYSICAL_ADDR, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jge 1f
#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:

/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
@@ -191,14 +193,14 @@ relocated:
leal boot_heap(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
- call decompress_kernel
+ call decompress_kernel /* returns kernel location in %eax */
addl $24, %esp

/*
* Jump to the decompressed kernel.
*/
xorl %ebx, %ebx
- jmp *%ebp
+ jmp *%eax

/*
* Stack and heap for uncompression
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index c337422..c5c1ae0 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -94,9 +94,11 @@ ENTRY(startup_32)
addl %eax, %ebx
notl %eax
andl %eax, %ebx
-#else
- movl $LOAD_PHYSICAL_ADDR, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jge 1f
#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:

/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
@@ -269,9 +271,11 @@ preferred_addr:
addq %rax, %rbp
notq %rax
andq %rax, %rbp
-#else
- movq $LOAD_PHYSICAL_ADDR, %rbp
+ cmpq $LOAD_PHYSICAL_ADDR, %rbp
+ jge 1f
#endif
+ movq $LOAD_PHYSICAL_ADDR, %rbp
+1:

/* Target address to relocate to for decompression */
leaq z_extract_offset(%rbp), %rbx
@@ -339,13 +343,13 @@ relocated:
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
movq $z_output_len, %r9 /* decompressed length */
- call decompress_kernel
+ call decompress_kernel /* returns kernel location in %rax */
popq %rsi

/*
* Jump to the decompressed kernel.
*/
- jmp *%rbp
+ jmp *%rax

.code32
no_longmode:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 434f077..7138768 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -395,7 +395,7 @@ static void parse_elf(void *output)
free(phdrs);
}

-asmlinkage void decompress_kernel(void *rmode, memptr heap,
+asmlinkage void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
@@ -422,6 +422,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;

+ output = choose_kernel_location(input_data, input_len,
+ output, output_len);
+
+ /* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
@@ -441,5 +445,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
parse_elf(output);
handle_relocations(output, output_len);
debug_putstr("done.\nBooting the kernel.\n");
- return;
+ return output;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 674019d..9077af7 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -39,23 +39,38 @@ static inline void debug_putstr(const char *s)

#endif

-#ifdef CONFIG_EARLY_PRINTK
-
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
+#endif

-/* early_serial_console.c */
-extern int early_serial_base;
-void console_init(void);

+#if CONFIG_RANDOMIZE_BASE
+/* aslr.c */
+unsigned char *choose_kernel_location(unsigned char *input,
+ unsigned long input_size,
+ unsigned char *output,
+ unsigned long output_size);
#else
+static inline
+unsigned char *choose_kernel_location(unsigned char *input,
+ unsigned long input_size,
+ unsigned char *output,
+ unsigned long output_size)
+{
+ return output;
+}
+#endif

+#ifdef CONFIG_EARLY_PRINTK
/* early_serial_console.c */
+extern int early_serial_base;
+void console_init(void);
+#else
static const int early_serial_base;
static inline void console_init(void)
{ }
-
#endif

#endif
--
1.7.9.5

2013-10-03 20:55:23

by Kees Cook

[permalink] [raw]
Subject: [PATCH 5/7] x86, kaslr: select memory region from e820 maps

Counts available alignment positions across all e820 maps, and chooses
one randomly for the new kernel base address.

Signed-off-by: Kees Cook <[email protected]>
---
v2:
- make sure to exclude e820 regions outside the 32-bit memory range.
---
arch/x86/boot/compressed/aslr.c | 140 ++++++++++++++++++++++++++++++++++++---
1 file changed, 130 insertions(+), 10 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 5bb7c63..83aee8b 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -2,6 +2,7 @@

#ifdef CONFIG_RANDOMIZE_BASE
#include <asm/msr.h>
+#include <asm/e820.h>
#include <asm/archrandom.h>

#define I8254_PORT_CONTROL 0x43
@@ -102,6 +103,130 @@ static unsigned long find_minimum_location(unsigned long input,
return output;
}

+/*
+ * This routine is used to count how many aligned slots that can hold the
+ * kernel exist across all e820 entries. It is called in two phases, once
+ * to count valid memory regions available for the kernel image, and a
+ * second time to select one from those seen.
+ *
+ * It is first called with "counting" set to true, where it expects to be
+ * called once for each e820 entry. In this mode, it will update *count
+ * with how many slots are available in the given e820 entry. Once the walk
+ * across all e820 entries has finished, the caller will have a total count
+ * of all valid memory regions available for the kernel image.
+ *
+ * Once the first pass of entry walking is finished, the caller selects one
+ * of the possible slots (stored in *count), and performs a second walk,
+ * with "counting" set to false. In this mode, *count is decremented until
+ * the corresponding slot is found in a specific e820 region, at which
+ * point, the function returns that address, and the walk terminates.
+ */
+static unsigned long process_e820_entry(struct e820entry *entry, bool counting,
+ unsigned long minimum,
+ unsigned long image_size,
+ unsigned long *count)
+{
+ u64 addr, size;
+ unsigned long alignments;
+
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_RAM)
+ return 0;
+
+ /* Ignore entries entirely above our maximum. */
+ if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
+ return 0;
+
+ /* Ignore entries entirely below our minimum. */
+ if (entry->addr + entry->size < minimum)
+ return 0;
+
+ size = entry->size;
+ addr = entry->addr;
+
+ /* Potentially raise address to minimum location. */
+ if (addr < minimum)
+ addr = minimum;
+
+ /* Potentially raise address to meet alignment requirements. */
+ addr = ALIGN(addr, CONFIG_PHYSICAL_ALIGN);
+
+ /* Did we raise the address above the bounds of this e820 region? */
+ if (addr > entry->addr + entry->size)
+ return 0;
+
+ /* Reduce size by any delta from the original address. */
+ size -= addr - entry->addr;
+
+ /* Reduce maximum image starting location to maximum limit. */
+ if (addr + size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
+ size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - addr;
+
+ /* Ignore entries that cannot hold even a single kernel image. */
+ if (size < image_size)
+ return 0;
+
+ /*
+ * Reduce size by kernel image size so we can see how many aligned
+ * starting addresses will fit without running past the end of a
+ * region. XXX: adjacent e820 regions are not detected, so up to
+ * image_size / CONFIG_PHYSICAL_ALIGN slots may go unused across
+ * adjacent regions.
+ */
+ size -= image_size;
+
+ /* Now we know how many aligned slots can contain the image. */
+ alignments = (size / CONFIG_PHYSICAL_ALIGN) + 1;
+
+ /* In the first pass, just counting all the e820 entries? */
+ if (counting) {
+ *count += alignments;
+ return 0;
+ }
+
+ /* Otherwise we're counting down to find a specific aligned slot. */
+ if (*count < alignments) {
+ /* Desired region is in this entry. */
+ return addr + (CONFIG_PHYSICAL_ALIGN * *count);
+ } else {
+ /* Desired region is beyond this entry. */
+ *count -= alignments;
+ return 0;
+ }
+}
+
+static unsigned long find_random_e820(unsigned long minimum,
+ unsigned long size)
+{
+ int i;
+ unsigned long addr, count;
+
+ /* Make sure minimum is aligned. */
+ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+ /* Verify potential e820 positions. */
+ count = 0;
+ for (i = 0; i < real_mode->e820_entries; i++) {
+ process_e820_entry(&real_mode->e820_map[i], true, minimum,
+ size, &count);
+ }
+
+ /* Handle crazy case of nothing fitting. */
+ if (count == 0)
+ return 0;
+
+ count = get_random_long() % count;
+
+ /* Select desired e820 position. */
+ for (i = 0; i < real_mode->e820_entries; i++) {
+ addr = process_e820_entry(&real_mode->e820_map[i], false,
+ minimum, size, &count);
+ if (addr)
+ return addr;
+ }
+ return 0;
+}
+
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
@@ -118,16 +243,11 @@ unsigned char *choose_kernel_location(unsigned char *input,
choice = find_minimum_location((unsigned long)input, input_size,
(unsigned long)output, output_size);

- /* XXX: Find an appropriate E820 hole, instead of adding offset. */
- random = get_random_long();
-
- /* Clip off top of the range. */
- random &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1);
- while (random + output_size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
- random >>= 1;
-
- /* Clip off bottom of range. */
- random &= ~(choice - 1);
+ random = find_random_e820(choice, output_size);
+ if (!random) {
+ debug_putstr("KASLR could not find suitable E820 region...\n");
+ goto out;
+ }

/* Always enforce the minimum. */
if (random < choice)
--
1.7.9.5

2013-10-03 22:24:15

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 3/7] x86, kaslr: find minimum safe relocation position

On 10/03/2013 01:53 PM, Kees Cook wrote:
> Examine all the known unsafe areas and avoid them by just raising the
> minimum relocation position to be past them.
>
> Signed-off-by: Kees Cook <[email protected]>

> + /* Minimum location must be above all these regions: */

This is highly problematic. The standard protocol is to hoist the
initramfs as high as possible in memory, so this may really unacceptably
restrict the available range.

It would be better to treat these the same as reserved regions in the
e820 map as far as the address space picking algorithm is concerned.

-hpa

2013-10-03 22:43:13

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 3/7] x86, kaslr: find minimum safe relocation position

On Thu, Oct 3, 2013 at 3:23 PM, H. Peter Anvin <[email protected]> wrote:
> On 10/03/2013 01:53 PM, Kees Cook wrote:
>> Examine all the known unsafe areas and avoid them by just raising the
>> minimum relocation position to be past them.
>>
>> Signed-off-by: Kees Cook <[email protected]>
>
>> + /* Minimum location must be above all these regions: */
>
> This is highly problematic. The standard protocol is to hoist the
> initramfs as high as possible in memory, so this may really unacceptably
> restrict the available range.

Doesn't this depend on the boot loader's behavior?

> It would be better to treat these the same as reserved regions in the
> e820 map as far as the address space picking algorithm is concerned.

Could this be considered a future optimization, or do you feel this is
required even for this first patch series landing?

-Kees

--
Kees Cook
Chrome OS Security

2013-10-03 22:47:22

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 3/7] x86, kaslr: find minimum safe relocation position

On 10/03/2013 03:43 PM, Kees Cook wrote:
>> This is highly problematic. The standard protocol is to hoist the
>> initramfs as high as possible in memory, so this may really unacceptably
>> restrict the available range.
>
> Doesn't this depend on the boot loader's behavior?

It does, but the recommended (and *required* for compatibility with
older kernels) behavior is to hoist as high as possible.

>> It would be better to treat these the same as reserved regions in the
>> e820 map as far as the address space picking algorithm is concerned.
>
> Could this be considered a future optimization, or do you feel this is
> required even for this first patch series landing?

Yes, I consider it required because of the above.

-hpa

2014-01-29 12:01:44

by David Woodhouse

[permalink] [raw]
Subject: [PATCH] x86, boot: fix word-size assumptions in has_eflag() inline asm

Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
flags out of cpucheck") introduced ambiguous inline asm in the
has_eflag() function. We want the instruction to be 'pushfl', but we
just say 'pushf' and hope the compiler does what we wanted.

When building with 'clang -m16', it won't, because clang doesn't use
the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.

Say what we mean and don't make the compiler make assumptions.

Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/boot/cpuflags.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a9fcb7c..168dd25 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -32,16 +32,16 @@ int has_eflag(unsigned long mask)
{
unsigned long f0, f1;

- asm volatile("pushf \n\t"
- "pushf \n\t"
+ asm volatile("pushfl \n\t"
+ "pushfl \n\t"
"pop %0 \n\t"
"mov %0,%1 \n\t"
"xor %2,%1 \n\t"
"push %1 \n\t"
- "popf \n\t"
- "pushf \n\t"
+ "popfl \n\t"
+ "pushfl \n\t"
"pop %1 \n\t"
- "popf"
+ "popfl"
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));

--
1.8.5.3




--
dwmw2


Attachments:
smime.p7s (5.61 kB)

2014-01-29 16:57:42

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: fix word-size assumptions in has_eflag() inline asm

On Wed, Jan 29, 2014 at 4:01 AM, David Woodhouse <[email protected]> wrote:
> Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
> flags out of cpucheck") introduced ambiguous inline asm in the
> has_eflag() function. We want the instruction to be 'pushfl', but we
> just say 'pushf' and hope the compiler does what we wanted.
>
> When building with 'clang -m16', it won't, because clang doesn't use
> the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.
>
> Say what we mean and don't make the compiler make assumptions.
>
> Signed-off-by: David Woodhouse <[email protected]>

Yes, excellent point. Thanks!

Acked-by: Kees Cook <[email protected]>

-Kees

--
Kees Cook
Chrome OS Security

Subject: [tip:x86/build] x86, boot: Fix word-size assumptions in has_eflag () inline asm

Commit-ID: 9b3614ccfa5492f57f743f3856253f285b10a702
Gitweb: http://git.kernel.org/tip/9b3614ccfa5492f57f743f3856253f285b10a702
Author: David Woodhouse <[email protected]>
AuthorDate: Wed, 29 Jan 2014 12:01:37 +0000
Committer: H. Peter Anvin <[email protected]>
CommitDate: Wed, 29 Jan 2014 09:09:40 -0800

x86, boot: Fix word-size assumptions in has_eflag() inline asm

Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
flags out of cpucheck") introduced ambiguous inline asm in the
has_eflag() function. We want the instruction to be 'pushfl', but we
just say 'pushf' and hope the compiler does what we wanted.

When building with 'clang -m16', it won't, because clang doesn't use
the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.

Say what we mean and don't make the compiler make assumptions.

[ hpa: we use plain "pushf" in the equivalent code elsewhere which may
be compiled as either 32- or 64-bit code. In those cases we want
the assembler to pick the appropriate size for us. However, this is
*16-bit* code and we still need these to be 32-bit operations. ]

Signed-off-by: David Woodhouse <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Kees Cook <[email protected]>
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/x86/boot/cpuflags.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a9fcb7c..168dd25 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -32,16 +32,16 @@ int has_eflag(unsigned long mask)
{
unsigned long f0, f1;

- asm volatile("pushf \n\t"
- "pushf \n\t"
+ asm volatile("pushfl \n\t"
+ "pushfl \n\t"
"pop %0 \n\t"
"mov %0,%1 \n\t"
"xor %2,%1 \n\t"
"push %1 \n\t"
- "popf \n\t"
- "pushf \n\t"
+ "popfl \n\t"
+ "pushfl \n\t"
"pop %1 \n\t"
- "popf"
+ "popfl"
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));

2014-01-30 09:09:11

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:x86/build] x86, boot: Fix word-size assumptions in has_eflag () inline asm


* tip-bot for David Woodhouse <[email protected]> wrote:

> Commit-ID: 9b3614ccfa5492f57f743f3856253f285b10a702
> Gitweb: http://git.kernel.org/tip/9b3614ccfa5492f57f743f3856253f285b10a702
> Author: David Woodhouse <[email protected]>
> AuthorDate: Wed, 29 Jan 2014 12:01:37 +0000
> Committer: H. Peter Anvin <[email protected]>
> CommitDate: Wed, 29 Jan 2014 09:09:40 -0800
>
> x86, boot: Fix word-size assumptions in has_eflag() inline asm
>
> Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
> flags out of cpucheck") introduced ambiguous inline asm in the
> has_eflag() function. We want the instruction to be 'pushfl', but we
> just say 'pushf' and hope the compiler does what we wanted.
>
> When building with 'clang -m16', it won't, because clang doesn't use
> the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.
>
> Say what we mean and don't make the compiler make assumptions.
>
> [ hpa: we use plain "pushf" in the equivalent code elsewhere which may
> be compiled as either 32- or 64-bit code. In those cases we want
> the assembler to pick the appropriate size for us. However, this is
> *16-bit* code and we still need these to be 32-bit operations. ]
>
> Signed-off-by: David Woodhouse <[email protected]>
> Link: http://lkml.kernel.org/r/[email protected]
> Signed-off-by: Kees Cook <[email protected]>
> Signed-off-by: H. Peter Anvin <[email protected]>
> ---
> arch/x86/boot/cpuflags.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
> index a9fcb7c..168dd25 100644
> --- a/arch/x86/boot/cpuflags.c
> +++ b/arch/x86/boot/cpuflags.c
> @@ -32,16 +32,16 @@ int has_eflag(unsigned long mask)
> {
> unsigned long f0, f1;
>
> - asm volatile("pushf \n\t"
> - "pushf \n\t"
> + asm volatile("pushfl \n\t"
> + "pushfl \n\t"
> "pop %0 \n\t"
> "mov %0,%1 \n\t"
> "xor %2,%1 \n\t"
> "push %1 \n\t"
> - "popf \n\t"
> - "pushf \n\t"
> + "popfl \n\t"
> + "pushfl \n\t"
> "pop %1 \n\t"
> - "popf"
> + "popfl"
> : "=&r" (f0), "=&r" (f1)
> : "ri" (mask));

This broke the build though:

arch/x86/boot/compressed/../cpuflags.c: Assembler messages:
arch/x86/boot/compressed/../cpuflags.c:35: Error: invalid instruction suffix for `pushf'

Thanks,

Ingo

2014-01-30 10:29:30

by Woodhouse, David

[permalink] [raw]
Subject: Re: [tip:x86/build] x86, boot: Fix word-size assumptions in has_eflag () inline asm

On Thu, 2014-01-30 at 10:09 +0100, Ingo Molnar wrote:
>
> This broke the build though:
>
> arch/x86/boot/compressed/../cpuflags.c: Assembler messages:
> arch/x86/boot/compressed/../cpuflags.c:35: Error: invalid instruction
> suffix for `pushf'

Ah, that only happens with CONFIG_RANDOMIZE_BASE, so I missed the fact
that 64-bit code was using the *same* inline asm. Sorry.

That 'pushfl' really does want to be 'pushfq' for the 64-bit version.
I'll look at how best to make it do the right thing...

--
David Woodhouse Open Source Technology Centre
[email protected] Intel Corporation


Attachments:
smime.p7s (4.27 kB)

2014-01-30 11:00:41

by David Woodhouse

[permalink] [raw]
Subject: [PATCH v2] x86, boot: Fix word-size assumptions in has_eflag () inline asm

Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
flags out of cpucheck") introduced ambiguous inline asm in the
has_eflag() function. In 16-bit mode want the instruction to be
'pushfl', but we just say 'pushf' and hope the compiler does what we
wanted.

When building with 'clang -m16', it won't, because clang doesn't use
the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.

Say what we mean and don't make the compiler make assumptions.

Signed-off-by: David Woodhouse <[email protected]>
---

Let me know if you'd rather have this as an incremental patch. I would
have preferred checking for BITS_PER_LONG==64 rather than __x86_64__
but it seems we set that to 64 even when building the 16-bit code.

arch/x86/boot/cpuflags.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a9fcb7c..431fa5f 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -28,20 +28,35 @@ static int has_fpu(void)
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}

+/*
+ * For building the 16-bit code we want to explicitly specify 32-bit
+ * push/pop operations, rather than just saying 'pushf' or 'popf' and
+ * letting the compiler choose. But this is also included from the
+ * compressed/ directory where it may be 64-bit code, and thus needs
+ * to be 'pushfq' or 'popfq' in that case.
+ */
+#ifdef __x86_64__
+#define PUSHF "pushfq"
+#define POPF "popfq"
+#else
+#define PUSHF "pushfl"
+#define POPF "popfl"
+#endif
+
int has_eflag(unsigned long mask)
{
unsigned long f0, f1;

- asm volatile("pushf \n\t"
- "pushf \n\t"
+ asm volatile(PUSHF " \n\t"
+ PUSHF " \n\t"
"pop %0 \n\t"
"mov %0,%1 \n\t"
"xor %2,%1 \n\t"
"push %1 \n\t"
- "popf \n\t"
- "pushf \n\t"
+ POPF " \n\t"
+ PUSHF " \n\t"
"pop %1 \n\t"
- "popf"
+ POPF
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));

--
1.8.5.3



--
dwmw2


Attachments:
smime.p7s (5.61 kB)

2014-01-30 13:47:02

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH v2] x86, boot: Fix word-size assumptions in has_eflag () inline asm

This would seem like a job for <asm/asm.h>.

On January 30, 2014 3:00:28 AM PST, David Woodhouse <[email protected]> wrote:
>Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
>flags out of cpucheck") introduced ambiguous inline asm in the
>has_eflag() function. In 16-bit mode want the instruction to be
>'pushfl', but we just say 'pushf' and hope the compiler does what we
>wanted.
>
>When building with 'clang -m16', it won't, because clang doesn't use
>the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.
>
>Say what we mean and don't make the compiler make assumptions.
>
>Signed-off-by: David Woodhouse <[email protected]>
>---
>
>Let me know if you'd rather have this as an incremental patch. I would
>have preferred checking for BITS_PER_LONG==64 rather than __x86_64__
>but it seems we set that to 64 even when building the 16-bit code.
>
> arch/x86/boot/cpuflags.c | 25 ++++++++++++++++++++-----
> 1 file changed, 20 insertions(+), 5 deletions(-)
>
>diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
>index a9fcb7c..431fa5f 100644
>--- a/arch/x86/boot/cpuflags.c
>+++ b/arch/x86/boot/cpuflags.c
>@@ -28,20 +28,35 @@ static int has_fpu(void)
> return fsw == 0 && (fcw & 0x103f) == 0x003f;
> }
>
>+/*
>+ * For building the 16-bit code we want to explicitly specify 32-bit
>+ * push/pop operations, rather than just saying 'pushf' or 'popf' and
>+ * letting the compiler choose. But this is also included from the
>+ * compressed/ directory where it may be 64-bit code, and thus needs
>+ * to be 'pushfq' or 'popfq' in that case.
>+ */
>+#ifdef __x86_64__
>+#define PUSHF "pushfq"
>+#define POPF "popfq"
>+#else
>+#define PUSHF "pushfl"
>+#define POPF "popfl"
>+#endif
>+
> int has_eflag(unsigned long mask)
> {
> unsigned long f0, f1;
>
>- asm volatile("pushf \n\t"
>- "pushf \n\t"
>+ asm volatile(PUSHF " \n\t"
>+ PUSHF " \n\t"
> "pop %0 \n\t"
> "mov %0,%1 \n\t"
> "xor %2,%1 \n\t"
> "push %1 \n\t"
>- "popf \n\t"
>- "pushf \n\t"
>+ POPF " \n\t"
>+ PUSHF " \n\t"
> "pop %1 \n\t"
>- "popf"
>+ POPF
> : "=&r" (f0), "=&r" (f1)
> : "ri" (mask));
>

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-01-30 14:08:31

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH v2] x86, boot: Fix word-size assumptions in has_eflag () inline asm

On Thu, 2014-01-30 at 05:45 -0800, H. Peter Anvin wrote:
> This would seem like a job for <asm/asm.h>.

Except that's all done on CONFIG_X86_32 which isn't useful for what we
are doing here.

We could, potentially, *change* <asm/asm.h> so that it actually makes
its choices based on whether __x86_64__ is defined?

Which would probably work, except for the next horrid corner case I
haven't discovered/realised yet :)

I think I'm inclined to keep this fairly localised, unless we really see
a case for doing it elsewhere in the kernel. Which is unlikely since a
bare 'pushf' should always do the right thing except in the weird
no-mans-land that is building 16-bit code with a 32-bit compiler.

I note that BITS_PER_LONG==64 even when building the 16-bit code as part
of a 64-bit kernel, too. That one is just waiting to bite us some day...

--
dwmw2


Attachments:
smime.p7s (5.61 kB)
Subject: [tip:x86/build] x86, boot: Fix word-size assumptions in has_eflag () inline asm

Commit-ID: 5fbbc25a99d680feca99a3095f0440f65d4307cc
Gitweb: http://git.kernel.org/tip/5fbbc25a99d680feca99a3095f0440f65d4307cc
Author: David Woodhouse <[email protected]>
AuthorDate: Thu, 30 Jan 2014 11:00:28 +0000
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 30 Jan 2014 08:04:32 -0800

x86, boot: Fix word-size assumptions in has_eflag() inline asm

Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
flags out of cpucheck") introduced ambiguous inline asm in the
has_eflag() function. In 16-bit mode want the instruction to be
'pushfl', but we just say 'pushf' and hope the compiler does what we
wanted.

When building with 'clang -m16', it won't, because clang doesn't use
the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.

Say what we mean and don't make the compiler make assumptions.

[ hpa: ideally we would be able to use the gcc %zN construct here, but
that is broken for 64-bit integers in gcc < 4.5.

The code with plain "pushf/popf" is fine for 32- or 64-bit mode, but
not for 16-bit mode; in 16-bit mode those are 16-bit instructions in
.code16 mode, and 32-bit instructions in .code16gcc mode. ]

Signed-off-by: David Woodhouse <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/x86/boot/cpuflags.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a9fcb7c..431fa5f 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -28,20 +28,35 @@ static int has_fpu(void)
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}

+/*
+ * For building the 16-bit code we want to explicitly specify 32-bit
+ * push/pop operations, rather than just saying 'pushf' or 'popf' and
+ * letting the compiler choose. But this is also included from the
+ * compressed/ directory where it may be 64-bit code, and thus needs
+ * to be 'pushfq' or 'popfq' in that case.
+ */
+#ifdef __x86_64__
+#define PUSHF "pushfq"
+#define POPF "popfq"
+#else
+#define PUSHF "pushfl"
+#define POPF "popfl"
+#endif
+
int has_eflag(unsigned long mask)
{
unsigned long f0, f1;

- asm volatile("pushf \n\t"
- "pushf \n\t"
+ asm volatile(PUSHF " \n\t"
+ PUSHF " \n\t"
"pop %0 \n\t"
"mov %0,%1 \n\t"
"xor %2,%1 \n\t"
"push %1 \n\t"
- "popf \n\t"
- "pushf \n\t"
+ POPF " \n\t"
+ PUSHF " \n\t"
"pop %1 \n\t"
- "popf"
+ POPF
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));

2014-01-30 23:01:08

by David Rientjes

[permalink] [raw]
Subject: Re: [PATCH v2] x86, boot: Fix word-size assumptions in has_eflag () inline asm

On Thu, 30 Jan 2014, David Woodhouse wrote:

> Commit dd78b97367bd575918204cc89107c1479d3fc1a7 ("x86, boot: Move CPU
> flags out of cpucheck") introduced ambiguous inline asm in the
> has_eflag() function. In 16-bit mode want the instruction to be
> 'pushfl', but we just say 'pushf' and hope the compiler does what we
> wanted.
>
> When building with 'clang -m16', it won't, because clang doesn't use
> the horrid '.code16gcc' hack that even 'gcc -m16' uses internally.
>
> Say what we mean and don't make the compiler make assumptions.
>
> Signed-off-by: David Woodhouse <[email protected]>

Fixes the x86-build-for-linus build error for me, thanks!