One of the easiest ways to protect the kernel from attack is to reduce
the internal attack surface exposed when a "write" flaw is available. By
making as much of the kernel read-only as possible, we reduce the
attack surface.
Many things are written to only during __init, and never changed
again. These cannot be made "const" since the compiler will do the wrong
thing (we do actually need to write to them). Instead, move these items
into a memory region that will be made read-only during mark_rodata_ro()
which happens after all kernel __init code has finished.
This introduces __ro_after_init as a way to mark such memory, and uses
it on the x86 vDSO to kill an extant kernel exploitation method. Also
adds a new kernel parameter to help debug future use and adds an lkdtm
test to check the results.
-Kees
v3:
- conslidated mark_rodata_ro()
- make CONFIG_DEBUG_RODATA always enabled on x86, mingo
- enhanced strtobool and potential callers to use "on"/"off"
- use strtobool for rodata= param, gregkh
v2:
- renamed __read_only to __ro_after_init
Instead of defining mark_rodata_ro() in each architecture, consolidate it.
Signed-off-by: Kees Cook <[email protected]>
Cc: Russell King <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: "James E.J. Bottomley" <[email protected]>
---
arch/arm/include/asm/cacheflush.h | 1 -
arch/arm64/include/asm/cacheflush.h | 4 ----
arch/parisc/include/asm/cacheflush.h | 4 ----
arch/x86/include/asm/cacheflush.h | 1 -
include/asm-generic/cacheflush.h | 4 ++++
5 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d5525bfc7e3e..9156fc303afd 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
#else
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 54efedaf331f..ca3b7841e1c6 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -155,8 +155,4 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
#endif
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 845272ce9cc5..7bd69bd43a01 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma
}
}
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
#include <asm/kmap_types.h>
#define ARCH_HAS_KMAP
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e63aa38e85fb..c8cff75c5b21 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -92,7 +92,6 @@ void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
extern const int rodata_test_data;
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index 87bc536ccde3..1225497ce3bf 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -31,4 +31,8 @@
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy(dst, src, len)
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
#endif /* __ASM_CACHEFLUSH_H */
--
1.9.1
Several places in the kernel expect to use "on" and "off" for their
boolean signifiers, so add them to strtobool.
Signed-off-by: Kees Cook <[email protected]>
Cc: Rasmus Villemoes <[email protected]>
Cc: Daniel Borkmann <[email protected]>
---
lib/string.c | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/lib/string.c b/lib/string.c
index 0323c0d5629a..d7550432f91c 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -635,12 +635,16 @@ EXPORT_SYMBOL(sysfs_streq);
* @s: input string
* @res: result
*
- * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * 'Oo' when the second character is one of 'fFnN' (for "on" and "off").
* Otherwise it will return -EINVAL. Value pointed to by res is
* updated upon finding a match.
*/
int strtobool(const char *s, bool *res)
{
+ if (!s)
+ return -EINVAL;
+
switch (s[0]) {
case 'y':
case 'Y':
@@ -652,6 +656,21 @@ int strtobool(const char *s, bool *res)
case '0':
*res = false;
break;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ break;
+ case 'f':
+ case 'F':
+ *res = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
default:
return -EINVAL;
}
--
1.9.1
This changes several users of manual "on"/"off" parsing to use strtobool.
Signed-off-by: Kees Cook <[email protected]>
---
arch/powerpc/kernel/rtasd.c | 10 +++-------
arch/powerpc/platforms/pseries/hotplug-cpu.c | 11 +++--------
arch/s390/kernel/time.c | 8 ++------
arch/s390/kernel/topology.c | 8 +++-----
arch/x86/kernel/aperture_64.c | 13 +++----------
kernel/time/hrtimer.c | 11 +++--------
kernel/time/tick-sched.c | 11 +++--------
7 files changed, 20 insertions(+), 52 deletions(-)
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5a2c049c1c61..984e67e91ba3 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -21,6 +21,7 @@
#include <linux/cpu.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -49,7 +50,7 @@ static unsigned int rtas_error_log_buffer_max;
static unsigned int event_scan;
static unsigned int rtas_event_scan_rate;
-static int full_rtas_msgs = 0;
+static bool full_rtas_msgs;
/* Stop logging to nvram after first fatal error */
static int logging_enabled; /* Until we initialize everything,
@@ -592,11 +593,6 @@ __setup("surveillance=", surveillance_setup);
static int __init rtasmsgs_setup(char *str)
{
- if (strcmp(str, "on") == 0)
- full_rtas_msgs = 1;
- else if (strcmp(str, "off") == 0)
- full_rtas_msgs = 0;
-
- return 1;
+ return strtobool(str, &full_rtas_msgs);
}
__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 62475440fd45..ffed89818d39 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -24,6 +24,7 @@
#include <linux/sched.h> /* for idle_task_exit */
#include <linux/cpu.h>
#include <linux/of.h>
+#include <linux/string.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
@@ -43,20 +44,14 @@ static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
-static int cede_offline_enabled __read_mostly = 1;
+static bool cede_offline_enabled __read_mostly = true;
/*
* Enable/disable cede_offline when available.
*/
static int __init setup_cede_offline(char *str)
{
- if (!strcmp(str, "off"))
- cede_offline_enabled = 0;
- else if (!strcmp(str, "on"))
- cede_offline_enabled = 1;
- else
- return 0;
- return 1;
+ return strtobool(str, &cede_offline_enabled);
}
__setup("cede_offline=", setup_cede_offline);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 99f84ac31307..afc7fc9684ba 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1433,7 +1433,7 @@ device_initcall(etr_init_sysfs);
/*
* Server Time Protocol (STP) code.
*/
-static int stp_online;
+static bool stp_online;
static struct stp_sstpi stp_info;
static void *stp_page;
@@ -1444,11 +1444,7 @@ static struct timer_list stp_timer;
static int __init early_parse_stp(char *p)
{
- if (strncmp(p, "off", 3) == 0)
- stp_online = 0;
- else if (strncmp(p, "on", 2) == 0)
- stp_online = 1;
- return 0;
+ return strtobool(p, &stp_online);
}
early_param("stp", early_parse_stp);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 40b8102fdadb..10e388216307 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -15,6 +15,7 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
@@ -37,7 +38,7 @@ static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
-static int topology_enabled = 1;
+static bool topology_enabled = true;
static DECLARE_WORK(topology_work, topology_work_fn);
/*
@@ -444,10 +445,7 @@ static const struct cpumask *cpu_book_mask(int cpu)
static int __init early_parse_topology(char *p)
{
- if (strncmp(p, "off", 3))
- return 0;
- topology_enabled = 0;
- return 0;
+ return strtobool(p, &topology_enabled);
}
early_param("topology", early_parse_topology);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e85f713641d..6608b00a516a 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,6 +20,7 @@
#include <linux/pci_ids.h>
#include <linux/pci.h>
#include <linux/bitops.h>
+#include <linux/string.h>
#include <linux/suspend.h>
#include <asm/e820.h>
#include <asm/io.h>
@@ -227,19 +228,11 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
return 0;
}
-static int gart_fix_e820 __initdata = 1;
+static bool gart_fix_e820 __initdata = true;
static int __init parse_gart_mem(char *p)
{
- if (!p)
- return -EINVAL;
-
- if (!strncmp(p, "off", 3))
- gart_fix_e820 = 0;
- else if (!strncmp(p, "on", 2))
- gart_fix_e820 = 1;
-
- return 0;
+ return strtobool(p, &gart_fix_e820);
}
early_param("gart_fix_e820", parse_gart_mem);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 435b8850dd80..40d82fe4d2a5 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -39,6 +39,7 @@
#include <linux/syscalls.h>
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
+#include <linux/string.h>
#include <linux/tick.h>
#include <linux/seq_file.h>
#include <linux/err.h>
@@ -515,7 +516,7 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
/*
* High resolution timer enabled ?
*/
-static int hrtimer_hres_enabled __read_mostly = 1;
+static bool hrtimer_hres_enabled __read_mostly = true;
unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
EXPORT_SYMBOL_GPL(hrtimer_resolution);
@@ -524,13 +525,7 @@ EXPORT_SYMBOL_GPL(hrtimer_resolution);
*/
static int __init setup_hrtimer_hres(char *str)
{
- if (!strcmp(str, "off"))
- hrtimer_hres_enabled = 0;
- else if (!strcmp(str, "on"))
- hrtimer_hres_enabled = 1;
- else
- return 0;
- return 1;
+ return strtobool(str, &hrtimer_hres_enabled);
}
__setup("highres=", setup_hrtimer_hres);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7c7ec4515983..dad2b3bec58b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,6 +19,7 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
+#include <linux/string.h>
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
@@ -387,20 +388,14 @@ void __init tick_nohz_init(void)
/*
* NO HZ enabled ?
*/
-static int tick_nohz_enabled __read_mostly = 1;
+static bool tick_nohz_enabled __read_mostly = true;
unsigned long tick_nohz_active __read_mostly;
/*
* Enable / Disable tickless mode
*/
static int __init setup_tick_nohz(char *str)
{
- if (!strcmp(str, "off"))
- tick_nohz_enabled = 0;
- else if (!strcmp(str, "on"))
- tick_nohz_enabled = 1;
- else
- return 0;
- return 1;
+ return strtobool(str, &tick_nohz_enabled);
}
__setup("nohz=", setup_tick_nohz);
--
1.9.1
It may be useful to debug writes to the readonly sections of memory,
so provide a cmdline "rodata=off" to allow for this. This can be
expanded in the future to support "log" and "write" modes, but that
will need to be architecture-specific.
Suggested-by: H. Peter Anvin <[email protected]>
Signed-off-by: Kees Cook <[email protected]>
---
Documentation/kernel-parameters.txt | 4 ++++
init/main.c | 27 +++++++++++++++++++++++----
kernel/debug/kdb/kdb_bp.c | 4 +---
3 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 742f69d18fc8..21cf76dbba90 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3409,6 +3409,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ro [KNL] Mount root device read-only on boot
+ rodata= [KNL]
+ on Mark read-only kernel memory as read-only (default).
+ off Leave read-only kernel memory writable for debugging.
+
root= [KNL] Root filesystem
See name_to_dev_t comment in init/do_mounts.c.
diff --git a/init/main.c b/init/main.c
index 9e64d7097f1a..fbafa271531c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -93,9 +93,6 @@ static int kernel_init(void *);
extern void init_IRQ(void);
extern void fork_init(void);
extern void radix_tree_init(void);
-#ifndef CONFIG_DEBUG_RODATA
-static inline void mark_rodata_ro(void) { }
-#endif
/*
* Debug helper: via this flag we know that we are in 'early bootup code'
@@ -929,6 +926,28 @@ static int try_to_run_init_process(const char *init_filename)
static noinline void __init kernel_init_freeable(void);
+#ifdef CONFIG_DEBUG_RODATA
+static bool rodata_enabled = true;
+static int __init set_debug_rodata(char *str)
+{
+ return strtobool(str, &rodata_enabled);
+}
+__setup("rodata=", set_debug_rodata);
+
+static void mark_readonly(void)
+{
+ if (rodata_enabled)
+ mark_rodata_ro();
+ else
+ pr_info("Kernel memory protection disabled.\n");
+}
+#else
+static inline void mark_readonly(void)
+{
+ pr_warn("This architecture does not have kernel memory protection.\n");
+}
+#endif
+
static int __ref kernel_init(void *unused)
{
int ret;
@@ -937,7 +956,7 @@ static int __ref kernel_init(void *unused)
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
- mark_rodata_ro();
+ mark_readonly();
system_state = SYSTEM_RUNNING;
numa_default_policy();
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index e1dbf4a2c69e..90ff129c88a2 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -153,13 +153,11 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
} else {
kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
__func__, bp->bp_addr);
-#ifdef CONFIG_DEBUG_RODATA
if (!bp->bp_type) {
kdb_printf("Software breakpoints are unavailable.\n"
- " Change the kernel CONFIG_DEBUG_RODATA=n\n"
+ " Boot the kernel with rodata=off\n"
" OR use hw breaks: help bph\n");
}
-#endif
return 1;
}
return 0;
--
1.9.1
This removes the CONFIG_DEBUG_RODATA option and makes it always enabled.
Suggested-by: Ingo Molnar <[email protected]>
Signed-off-by: Kees Cook <[email protected]>
---
arch/x86/Kconfig | 3 +++
arch/x86/Kconfig.debug | 18 +++---------------
arch/x86/include/asm/cacheflush.h | 5 -----
arch/x86/include/asm/kvm_para.h | 7 -------
arch/x86/include/asm/sections.h | 2 +-
arch/x86/kernel/ftrace.c | 6 +++---
arch/x86/kernel/kgdb.c | 8 ++------
arch/x86/kernel/test_nx.c | 2 --
arch/x86/kernel/test_rodata.c | 2 +-
arch/x86/kernel/vmlinux.lds.S | 25 +++++++++++--------------
arch/x86/mm/init_32.c | 3 ---
arch/x86/mm/init_64.c | 3 ---
arch/x86/mm/pageattr.c | 2 +-
13 files changed, 25 insertions(+), 61 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index db3622f22b61..f7c69436accc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -289,6 +289,9 @@ config ARCH_SUPPORTS_UPROBES
config FIX_EARLYCON_MEM
def_bool y
+config DEBUG_RODATA
+ def_bool y
+
config PGTABLE_LEVELS
int
default 4 if X86_64
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 137dfa96aa14..1f6c306a9a00 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -91,28 +91,16 @@ config EFI_PGT_DUMP
issues with the mapping of the EFI runtime regions into that
table.
-config DEBUG_RODATA
- bool "Write protect kernel read-only data structures"
- default y
- depends on DEBUG_KERNEL
- ---help---
- Mark the kernel read-only data as write-protected in the pagetables,
- in order to catch accidental (and incorrect) writes to such const
- data. This is recommended so that we can catch kernel bugs sooner.
- If in doubt, say "Y".
-
config DEBUG_RODATA_TEST
- bool "Testcase for the DEBUG_RODATA feature"
- depends on DEBUG_RODATA
+ bool "Testcase for the marking rodata read-only"
default y
---help---
- This option enables a testcase for the DEBUG_RODATA
- feature as well as for the change_page_attr() infrastructure.
+ This option enables a testcase for the setting rodata read-only
+ as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
config DEBUG_WX
bool "Warn on W+X mappings at boot"
- depends on DEBUG_RODATA
select X86_PTDUMP_CORE
---help---
Generate a warning if any W+X mappings are found at boot.
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index c8cff75c5b21..61518cf79437 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -91,15 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
-#ifdef CONFIG_DEBUG_RODATA
extern const int rodata_test_data;
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
#ifdef CONFIG_DEBUG_RODATA_TEST
int rodata_test(void);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c1adf33fdd0d..bc62e7cbf1b1 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
-#ifdef CONFIG_DEBUG_RODATA
#define KVM_HYPERCALL \
ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
-#else
-/* On AMD processors, vmcall will generate a trap that we will
- * then rewrite to the appropriate instruction.
- */
-#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
-#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 0a5242428659..13b6cdd0af57 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -7,7 +7,7 @@
extern char __brk_base[], __brk_limit[];
extern struct exception_table_entry __stop___ex_table[];
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
extern char __end_rodata_hpage_align[];
#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 311bcf338f07..eb6bd34582c6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end)
static unsigned long text_ip_addr(unsigned long ip)
{
/*
- * On x86_64, kernel text mappings are mapped read-only with
- * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
- * of the kernel text mapping to modify the kernel text.
+ * On x86_64, kernel text mappings are mapped read-only, so we use
+ * the kernel identity mapping instead of the kernel text mapping
+ * to modify the kernel text.
*
* For 32bit kernels, these mappings are same and we can use
* kernel identity mapping to modify code.
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 44256a62702b..ed15cd486d06 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
-#ifdef CONFIG_DEBUG_RODATA
char opc[BREAK_INSTR_SIZE];
-#endif /* CONFIG_DEBUG_RODATA */
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
return err;
err = probe_kernel_write((char *)bpt->bpt_addr,
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
-#ifdef CONFIG_DEBUG_RODATA
if (!err)
return err;
/*
@@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
return -EINVAL;
bpt->type = BP_POKE_BREAKPOINT;
-#endif /* CONFIG_DEBUG_RODATA */
+
return err;
}
int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
-#ifdef CONFIG_DEBUG_RODATA
int err;
char opc[BREAK_INSTR_SIZE];
@@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
goto knl_write;
return err;
+
knl_write:
-#endif /* CONFIG_DEBUG_RODATA */
return probe_kernel_write((char *)bpt->bpt_addr,
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
}
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 3f92ce07e525..27538f183c3b 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -142,7 +142,6 @@ static int test_NX(void)
* by the error message
*/
-#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
if (rodata_test_data != 0xC3) {
printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
@@ -151,7 +150,6 @@ static int test_NX(void)
printk(KERN_ERR "test_nx: .rodata section is executable\n");
ret = -ENODEV;
}
-#endif
#if 0
/* Test 4: Check if the .data section of a module is executable */
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 5ecbfe5099da..cb4a01b41e27 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -76,5 +76,5 @@ int rodata_test(void)
}
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
MODULE_AUTHOR("Arjan van de Ven <[email protected]>");
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf11f562..fe133b710bef 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -41,29 +41,28 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
/*
- * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
- * we retain large page mappings for boundaries spanning kernel text, rodata
- * and data sections.
+ * On 64-bit, align RODATA to 2MB so we retain large page mappings for
+ * boundaries spanning kernel text, rodata and data sections.
*
* However, kernel identity mappings will have different RWX permissions
* to the pages mapping to text and to the pages padding (which are freed) the
* text section. Hence kernel identity mappings will be broken to smaller
* pages. For 64-bit, kernel text and kernel identity mappings are different,
- * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
- * as well as retain 2MB large page mappings for kernel text.
+ * so we can enable protection checks as well as retain 2MB large page
+ * mappings for kernel text.
*/
-#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
+#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
-#define X64_ALIGN_DEBUG_RODATA_END \
+#define X64_ALIGN_RODATA_END \
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
#else
-#define X64_ALIGN_DEBUG_RODATA_BEGIN
-#define X64_ALIGN_DEBUG_RODATA_END
+#define X64_ALIGN_RODATA_BEGIN
+#define X64_ALIGN_RODATA_END
#endif
@@ -112,13 +111,11 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090
-#if defined(CONFIG_DEBUG_RODATA)
/* .text should occupy whole number of pages */
. = ALIGN(PAGE_SIZE);
-#endif
- X64_ALIGN_DEBUG_RODATA_BEGIN
+ X64_ALIGN_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
- X64_ALIGN_DEBUG_RODATA_END
+ X64_ALIGN_RODATA_END
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cb4ef3de61f9..2ebfbaf61142 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -871,7 +871,6 @@ static noinline int do_test_wp_bit(void)
return flag;
}
-#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
@@ -960,5 +959,3 @@ void mark_rodata_ro(void)
if (__supported_pte_mask & _PAGE_NX)
debug_checkwx();
}
-#endif
-
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ec081fe0ce2c..e08d141844ee 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1062,7 +1062,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
}
-#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
@@ -1154,8 +1153,6 @@ void mark_rodata_ro(void)
debug_checkwx();
}
-#endif
-
int kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a3137a4feed1..8d77c7f7a614 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -278,7 +278,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
__pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
/*
* Once the kernel maps the text as RO (kernel_set_to_readonly is set),
* kernel text mappings for the large page aligned text, rodata sections
--
1.9.1
One of the easiest ways to protect the kernel from attack is to reduce
the internal attack surface exposed when a "write" flaw is available. By
making as much of the kernel read-only as possible, we reduce the
attack surface.
Many things are written to only during __init, and never changed
again. These cannot be made "const" since the compiler will do the wrong
thing (we do actually need to write to them). Instead, move these items
into a memory region that will be made read-only during mark_rodata_ro()
which happens after all kernel __init code has finished.
This introduces __ro_after_init as a way to mark such memory, and adds
some documentation about the existing __read_mostly marking.
Based on work by PaX Team and Brad Spengler.
Signed-off-by: Kees Cook <[email protected]>
---
arch/parisc/include/asm/cache.h | 3 +++
include/asm-generic/vmlinux.lds.h | 1 +
include/linux/cache.h | 14 ++++++++++++++
3 files changed, 18 insertions(+)
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 3d0e17bcc8e9..df0f52bd18b4 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -22,6 +22,9 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init __read_mostly
+
void parisc_cache_init(void); /* initializes cache-flushing */
void disable_sr_hashing_asm(int); /* low level support for above */
void disable_sr_hashing(void); /* turns off space register hashing */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c4bd0e2c173c..772c784ba763 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -256,6 +256,7 @@
.rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_rodata) = .; \
*(.rodata) *(.rodata.*) \
+ *(.data..ro_after_init) /* Read only after init */ \
*(__vermagic) /* Kernel version magic */ \
. = ALIGN(8); \
VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .; \
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 17e7e82d2aa7..1be04f8c563a 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -12,10 +12,24 @@
#define SMP_CACHE_BYTES L1_CACHE_BYTES
#endif
+/*
+ * __read_mostly is used to keep rarely changing variables out of frequently
+ * updated cachelines. If an architecture doesn't support it, ignore the
+ * hint.
+ */
#ifndef __read_mostly
#define __read_mostly
#endif
+/*
+ * __ro_after_init is used to mark things that are read-only after init (i.e.
+ * after mark_rodata_ro() has been called). These are effectively read-only,
+ * but may get written to during init, so can't live in .rodata (via "const").
+ */
+#ifndef __ro_after_init
+#define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
+#endif
+
#ifndef ____cacheline_aligned
#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
#endif
--
1.9.1
The new __ro_after_init section should be writable before init, but
not after. Validate that it gets updated at init and can't be written
to afterwards.
Signed-off-by: Kees Cook <[email protected]>
---
drivers/misc/lkdtm.c | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index 11fdadc68e53..2a6eaf1122b4 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -103,6 +103,7 @@ enum ctype {
CT_EXEC_USERSPACE,
CT_ACCESS_USERSPACE,
CT_WRITE_RO,
+ CT_WRITE_RO_AFTER_INIT,
CT_WRITE_KERN,
};
@@ -140,6 +141,7 @@ static char* cp_type[] = {
"EXEC_USERSPACE",
"ACCESS_USERSPACE",
"WRITE_RO",
+ "WRITE_RO_AFTER_INIT",
"WRITE_KERN",
};
@@ -162,6 +164,7 @@ static DEFINE_SPINLOCK(lock_me_up);
static u8 data_area[EXEC_SIZE];
static const unsigned long rodata = 0xAA55AA55;
+static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
module_param(recur_count, int, 0644);
MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
@@ -503,11 +506,28 @@ static void lkdtm_do_action(enum ctype which)
break;
}
case CT_WRITE_RO: {
- unsigned long *ptr;
+ /* Explicitly cast away "const" for the test. */
+ unsigned long *ptr = (unsigned long *)&rodata;
- ptr = (unsigned long *)&rodata;
+ pr_info("attempting bad rodata write at %p\n", ptr);
+ *ptr ^= 0xabcd1234;
- pr_info("attempting bad write at %p\n", ptr);
+ break;
+ }
+ case CT_WRITE_RO_AFTER_INIT: {
+ unsigned long *ptr = &ro_after_init;
+
+ /*
+ * Verify we were written to during init. Since an Oops
+ * is considered a "success", a failure is to just skip the
+ * real test.
+ */
+ if ((*ptr & 0xAA) != 0xAA) {
+ pr_info("%p was NOT written during init!?\n", ptr);
+ break;
+ }
+
+ pr_info("attempting bad ro_after_init write at %p\n", ptr);
*ptr ^= 0xabcd1234;
break;
@@ -817,6 +837,9 @@ static int __init lkdtm_module_init(void)
int n_debugfs_entries = 1; /* Assume only the direct entry */
int i;
+ /* Make sure we can write to __ro_after_init values during __init */
+ ro_after_init |= 0xAA;
+
/* Register debugfs interface */
lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
if (!lkdtm_debugfs_root) {
--
1.9.1
The vDSO does not need to be writable after __init, so mark it as
__ro_after_init. The result kills the exploit method of writing to the
vDSO from kernel space resulting in userspace executing the modified code,
as shown here to bypass SMEP restrictions: http://itszn.com/blog/?p=21
The memory map (with added vDSO address reporting) shows the vDSO moving
into read-only memory:
Before:
[ 0.143067] vDSO @ ffffffff82004000
[ 0.143551] vDSO @ ffffffff82006000
---[ High Kernel Mapping ]---
0xffffffff80000000-0xffffffff81000000 16M pmd
0xffffffff81000000-0xffffffff81800000 8M ro PSE GLB x pmd
0xffffffff81800000-0xffffffff819f3000 1996K ro GLB x pte
0xffffffff819f3000-0xffffffff81a00000 52K ro NX pte
0xffffffff81a00000-0xffffffff81e00000 4M ro PSE GLB NX pmd
0xffffffff81e00000-0xffffffff81e05000 20K ro GLB NX pte
0xffffffff81e05000-0xffffffff82000000 2028K ro NX pte
0xffffffff82000000-0xffffffff8214f000 1340K RW GLB NX pte
0xffffffff8214f000-0xffffffff82281000 1224K RW NX pte
0xffffffff82281000-0xffffffff82400000 1532K RW GLB NX pte
0xffffffff82400000-0xffffffff83200000 14M RW PSE GLB NX pmd
0xffffffff83200000-0xffffffffc0000000 974M pmd
After:
[ 0.145062] vDSO @ ffffffff81da1000
[ 0.146057] vDSO @ ffffffff81da4000
---[ High Kernel Mapping ]---
0xffffffff80000000-0xffffffff81000000 16M pmd
0xffffffff81000000-0xffffffff81800000 8M ro PSE GLB x pmd
0xffffffff81800000-0xffffffff819f3000 1996K ro GLB x pte
0xffffffff819f3000-0xffffffff81a00000 52K ro NX pte
0xffffffff81a00000-0xffffffff81e00000 4M ro PSE GLB NX pmd
0xffffffff81e00000-0xffffffff81e0b000 44K ro GLB NX pte
0xffffffff81e0b000-0xffffffff82000000 2004K ro NX pte
0xffffffff82000000-0xffffffff8214c000 1328K RW GLB NX pte
0xffffffff8214c000-0xffffffff8227e000 1224K RW NX pte
0xffffffff8227e000-0xffffffff82400000 1544K RW GLB NX pte
0xffffffff82400000-0xffffffff83200000 14M RW PSE GLB NX pmd
0xffffffff83200000-0xffffffffc0000000 974M pmd
Based on work by PaX Team and Brad Spengler.
Signed-off-by: Kees Cook <[email protected]>
---
arch/x86/entry/vdso/vdso2c.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 0224987556ce..eb93a3137ed2 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "#include <asm/vdso.h>\n");
fprintf(outfile, "\n");
fprintf(outfile,
- "static unsigned char raw_data[%lu] __page_aligned_data = {",
+ "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
mapping_size);
for (j = 0; j < stripped_len; j++) {
if (j % 10 == 0)
@@ -150,7 +150,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "static struct page *pages[%lu];\n\n",
+ fprintf(outfile, "static struct page *pages[%lu] __ro_after_init;\n\n",
mapping_size / 4096);
fprintf(outfile, "const struct vdso_image %s = {\n", name);
--
1.9.1
On Wed, Dec 9, 2015 at 1:43 PM, Kees Cook <[email protected]> wrote:
> The vDSO does not need to be writable after __init, so mark it as
> __ro_after_init. The result kills the exploit method of writing to the
> vDSO from kernel space resulting in userspace executing the modified code,
> as shown here to bypass SMEP restrictions: http://itszn.com/blog/?p=21
>
> The memory map (with added vDSO address reporting) shows the vDSO moving
> into read-only memory:
>
> Before:
> [ 0.143067] vDSO @ ffffffff82004000
> [ 0.143551] vDSO @ ffffffff82006000
> ---[ High Kernel Mapping ]---
> 0xffffffff80000000-0xffffffff81000000 16M pmd
> 0xffffffff81000000-0xffffffff81800000 8M ro PSE GLB x pmd
> 0xffffffff81800000-0xffffffff819f3000 1996K ro GLB x pte
> 0xffffffff819f3000-0xffffffff81a00000 52K ro NX pte
> 0xffffffff81a00000-0xffffffff81e00000 4M ro PSE GLB NX pmd
> 0xffffffff81e00000-0xffffffff81e05000 20K ro GLB NX pte
> 0xffffffff81e05000-0xffffffff82000000 2028K ro NX pte
> 0xffffffff82000000-0xffffffff8214f000 1340K RW GLB NX pte
> 0xffffffff8214f000-0xffffffff82281000 1224K RW NX pte
> 0xffffffff82281000-0xffffffff82400000 1532K RW GLB NX pte
> 0xffffffff82400000-0xffffffff83200000 14M RW PSE GLB NX pmd
> 0xffffffff83200000-0xffffffffc0000000 974M pmd
>
> After:
> [ 0.145062] vDSO @ ffffffff81da1000
> [ 0.146057] vDSO @ ffffffff81da4000
> ---[ High Kernel Mapping ]---
> 0xffffffff80000000-0xffffffff81000000 16M pmd
> 0xffffffff81000000-0xffffffff81800000 8M ro PSE GLB x pmd
> 0xffffffff81800000-0xffffffff819f3000 1996K ro GLB x pte
> 0xffffffff819f3000-0xffffffff81a00000 52K ro NX pte
> 0xffffffff81a00000-0xffffffff81e00000 4M ro PSE GLB NX pmd
> 0xffffffff81e00000-0xffffffff81e0b000 44K ro GLB NX pte
> 0xffffffff81e0b000-0xffffffff82000000 2004K ro NX pte
> 0xffffffff82000000-0xffffffff8214c000 1328K RW GLB NX pte
> 0xffffffff8214c000-0xffffffff8227e000 1224K RW NX pte
> 0xffffffff8227e000-0xffffffff82400000 1544K RW GLB NX pte
> 0xffffffff82400000-0xffffffff83200000 14M RW PSE GLB NX pmd
> 0xffffffff83200000-0xffffffffc0000000 974M pmd
>
> Based on work by PaX Team and Brad Spengler.
>
> Signed-off-by: Kees Cook <[email protected]>
Acked-by: Andy Lutomirski <[email protected]>
--Andy
On Wed, Dec 9, 2015 at 3:13 PM, Andy Lutomirski <[email protected]> wrote:
> On Wed, Dec 9, 2015 at 1:43 PM, Kees Cook <[email protected]> wrote:
>> The vDSO does not need to be writable after __init, so mark it as
>> __ro_after_init. The result kills the exploit method of writing to the
>> vDSO from kernel space resulting in userspace executing the modified code,
>> as shown here to bypass SMEP restrictions: http://itszn.com/blog/?p=21
>>
>> The memory map (with added vDSO address reporting) shows the vDSO moving
>> into read-only memory:
>>
>> Before:
>> [ 0.143067] vDSO @ ffffffff82004000
>> [ 0.143551] vDSO @ ffffffff82006000
>> ---[ High Kernel Mapping ]---
>> 0xffffffff80000000-0xffffffff81000000 16M pmd
>> 0xffffffff81000000-0xffffffff81800000 8M ro PSE GLB x pmd
>> 0xffffffff81800000-0xffffffff819f3000 1996K ro GLB x pte
>> 0xffffffff819f3000-0xffffffff81a00000 52K ro NX pte
>> 0xffffffff81a00000-0xffffffff81e00000 4M ro PSE GLB NX pmd
>> 0xffffffff81e00000-0xffffffff81e05000 20K ro GLB NX pte
>> 0xffffffff81e05000-0xffffffff82000000 2028K ro NX pte
>> 0xffffffff82000000-0xffffffff8214f000 1340K RW GLB NX pte
>> 0xffffffff8214f000-0xffffffff82281000 1224K RW NX pte
>> 0xffffffff82281000-0xffffffff82400000 1532K RW GLB NX pte
>> 0xffffffff82400000-0xffffffff83200000 14M RW PSE GLB NX pmd
>> 0xffffffff83200000-0xffffffffc0000000 974M pmd
>>
>> After:
>> [ 0.145062] vDSO @ ffffffff81da1000
>> [ 0.146057] vDSO @ ffffffff81da4000
>> ---[ High Kernel Mapping ]---
>> 0xffffffff80000000-0xffffffff81000000 16M pmd
>> 0xffffffff81000000-0xffffffff81800000 8M ro PSE GLB x pmd
>> 0xffffffff81800000-0xffffffff819f3000 1996K ro GLB x pte
>> 0xffffffff819f3000-0xffffffff81a00000 52K ro NX pte
>> 0xffffffff81a00000-0xffffffff81e00000 4M ro PSE GLB NX pmd
>> 0xffffffff81e00000-0xffffffff81e0b000 44K ro GLB NX pte
>> 0xffffffff81e0b000-0xffffffff82000000 2004K ro NX pte
>> 0xffffffff82000000-0xffffffff8214c000 1328K RW GLB NX pte
>> 0xffffffff8214c000-0xffffffff8227e000 1224K RW NX pte
>> 0xffffffff8227e000-0xffffffff82400000 1544K RW GLB NX pte
>> 0xffffffff82400000-0xffffffff83200000 14M RW PSE GLB NX pmd
>> 0xffffffff83200000-0xffffffffc0000000 974M pmd
>>
>> Based on work by PaX Team and Brad Spengler.
>>
>> Signed-off-by: Kees Cook <[email protected]>
>
> Acked-by: Andy Lutomirski <[email protected]>
>
FWIW, this has a minor conflict with some stuff in my queue. I
wouldn't worry about it for now.
--Andy
On Wed, Dec 9, 2015 at 11:43 PM, Kees Cook <[email protected]> wrote:
> Several places in the kernel expect to use "on" and "off" for their
> boolean signifiers, so add them to strtobool.
>
> Signed-off-by: Kees Cook <[email protected]>
> Cc: Rasmus Villemoes <[email protected]>
> Cc: Daniel Borkmann <[email protected]>
> ---
> lib/string.c | 21 ++++++++++++++++++++-
> 1 file changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/lib/string.c b/lib/string.c
> index 0323c0d5629a..d7550432f91c 100644
> --- a/lib/string.c
> +++ b/lib/string.c
> @@ -635,12 +635,16 @@ EXPORT_SYMBOL(sysfs_streq);
> * @s: input string
> * @res: result
> *
> - * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
> + * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
> + * 'Oo' when the second character is one of 'fFnN' (for "on" and "off").
Maybe
…or [Oo][FfNn] for "off" and "on"…
> * Otherwise it will return -EINVAL. Value pointed to by res is
> * updated upon finding a match.
> */
> int strtobool(const char *s, bool *res)
> {
> + if (!s)
> + return -EINVAL;
> +
This change I think is better to do separately. Do we have even need for it?
> switch (s[0]) {
> case 'y':
> case 'Y':
> @@ -652,6 +656,21 @@ int strtobool(const char *s, bool *res)
> case '0':
> *res = false;
> break;
> + case 'o':
> + case 'O':
> + switch (s[1]) {
> + case 'n':
> + case 'N':
> + *res = true;
> + break;
> + case 'f':
> + case 'F':
> + *res = false;
> + break;
> + default:
> + return -EINVAL;
> + }
> + break;
> default:
> return -EINVAL;
Maybe in both cases
default:
break;
}
…
}
return -EINVAL;
--
With Best Regards,
Andy Shevchenko
On Fri, Dec 11, 2015 at 9:00 AM, Andy Shevchenko
<[email protected]> wrote:
> On Wed, Dec 9, 2015 at 11:43 PM, Kees Cook <[email protected]> wrote:
>> Several places in the kernel expect to use "on" and "off" for their
>> boolean signifiers, so add them to strtobool.
>>
>> Signed-off-by: Kees Cook <[email protected]>
>> Cc: Rasmus Villemoes <[email protected]>
>> Cc: Daniel Borkmann <[email protected]>
>> ---
>> lib/string.c | 21 ++++++++++++++++++++-
>> 1 file changed, 20 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/string.c b/lib/string.c
>> index 0323c0d5629a..d7550432f91c 100644
>> --- a/lib/string.c
>> +++ b/lib/string.c
>> @@ -635,12 +635,16 @@ EXPORT_SYMBOL(sysfs_streq);
>> * @s: input string
>> * @res: result
>> *
>> - * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
>> + * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
>> + * 'Oo' when the second character is one of 'fFnN' (for "on" and "off").
>
> Maybe
>
> …or [Oo][FfNn] for "off" and "on"…
Sure! That's more readable.
>> * Otherwise it will return -EINVAL. Value pointed to by res is
>> * updated upon finding a match.
>> */
>> int strtobool(const char *s, bool *res)
>> {
>
>> + if (!s)
>> + return -EINVAL;
>> +
>
> This change I think is better to do separately. Do we have even need for it?
I'm happy to separate it, sure. I added it here because several of the
__setup and param callers do a check for !NULL input, and it made this
cleaner. Also it seems sensible to do this check anyway.
>> switch (s[0]) {
>> case 'y':
>> case 'Y':
>> @@ -652,6 +656,21 @@ int strtobool(const char *s, bool *res)
>> case '0':
>> *res = false;
>> break;
>> + case 'o':
>> + case 'O':
>> + switch (s[1]) {
>> + case 'n':
>> + case 'N':
>> + *res = true;
>> + break;
>> + case 'f':
>> + case 'F':
>> + *res = false;
>> + break;
>
>
>> + default:
>> + return -EINVAL;
>> + }
>> + break;
>> default:
>> return -EINVAL;
>
> Maybe in both cases
> default:
> break;
> }
> …
> }
> return -EINVAL;
I went back and forth on this. To switch to the fall-back being EINVAL
meant I had to change all the other "breaks" into "return 0", and it
just looked ugly to me. If that is preferred, though, I'm happy to do
it.
Thanks!
-Kees
--
Kees Cook
Chrome OS & Brillo Security
On Fri, Dec 11, 2015 at 8:50 PM, Kees Cook <[email protected]> wrote:
> On Fri, Dec 11, 2015 at 9:00 AM, Andy Shevchenko
> <[email protected]> wrote:
>> On Wed, Dec 9, 2015 at 11:43 PM, Kees Cook <[email protected]> wrote:
>>> Several places in the kernel expect to use "on" and "off" for their
>>> boolean signifiers, so add them to strtobool.
>>> + if (!s)
>>> + return -EINVAL;
>>> +
>>
>> This change I think is better to do separately. Do we have even need for it?
>
> I'm happy to separate it, sure. I added it here because several of the
> __setup and param callers do a check for !NULL input, and it made this
> cleaner. Also it seems sensible to do this check anyway.
OK.
>>> + default:
>>> + return -EINVAL;
>>> + }
>>> + break;
>>> default:
>>> return -EINVAL;
>>
>> Maybe in both cases
>> default:
>> break;
>> }
>> …
>> }
>> return -EINVAL;
>
> I went back and forth on this. To switch to the fall-back being EINVAL
> meant I had to change all the other "breaks" into "return 0", and it
> just looked ugly to me. If that is preferred, though, I'm happy to do
> it.
I have no strong feelings about that, I prefer whatever looks neater.
--
With Best Regards,
Andy Shevchenko