There have been multiple kernel vulnerabilities that permitted userspace to
pass completely unchecked pointers through to userspace accessors:
- the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
access_ok() checks")
- the sg/bsg read/write APIs
- the infiniband read/write APIs
These don't happen all that often, but when they do happen, it is hard to
test for them properly; and it is probably also hard to discover them with
fuzzing. Even when an unmapped kernel address is supplied to such buggy
code, it just returns -EFAULT instead of doing a proper BUG() or at least
WARN().
This patch attempts to make such misbehaving code a bit more visible by
WARN()ing in the pagefault handler code when a userspace accessor causes
#PF on a kernel address and the current context isn't whitelisted.
Signed-off-by: Jann Horn <[email protected]>
---
This is a hacky, quickly thrown-together PoC to see what people think
about the basic idea. Does it look like a sensible change? Or would it
be better, for example, to instead expand the KASan hooks in the
usercopy logic to also look at the "user" pointer if it points to
kernelspace? That would have the advantage of also catching heap
overflows properly...
I'm not really happy with all the plumbing in my patch; I wonder whether
there's a way around introducing _ASM_EXTABLE_UA() for user accessors?
arch/x86/include/asm/asm.h | 10 ++-
arch/x86/include/asm/extable.h | 3 +-
arch/x86/include/asm/fpu/internal.h | 6 +-
arch/x86/include/asm/futex.h | 6 +-
arch/x86/include/asm/uaccess.h | 22 ++---
arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
arch/x86/kernel/kprobes/core.c | 2 +-
arch/x86/kernel/traps.c | 6 +-
arch/x86/lib/checksum_32.S | 4 +-
arch/x86/lib/copy_user_64.S | 90 ++++++++++----------
arch/x86/lib/csum-copy_64.S | 6 +-
arch/x86/lib/getuser.S | 12 +--
arch/x86/lib/putuser.S | 10 +--
arch/x86/lib/usercopy_32.c | 126 ++++++++++++++--------------
arch/x86/lib/usercopy_64.c | 4 +-
arch/x86/mm/extable.c | 67 ++++++++++++---
arch/x86/mm/fault.c | 2 +-
include/linux/sched.h | 2 +
mm/maccess.c | 6 ++
19 files changed, 221 insertions(+), 165 deletions(-)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 990770f9e76b..38f44a773adf 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -130,6 +130,9 @@
# define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
@@ -165,8 +168,8 @@
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(100b,103b)
- _ASM_EXTABLE(101b,103b)
+ _ASM_EXTABLE_UA(100b,103b)
+ _ASM_EXTABLE_UA(101b,103b)
.endm
#else
@@ -182,6 +185,9 @@
# define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index f9c3a5d502f4..93c1d28f3c73 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,7 +29,8 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
-extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_exception(struct pt_regs *regs, int trapnr,
+ unsigned long pf_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37a..640e7721138c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -113,7 +113,7 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: [err] "=r" (err), output \
: "0"(0), input); \
err; \
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
"3: movl $-2,%[err]\n\t" \
"jmp 2b\n\t" \
".popsection\n\t" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -256,7 +256,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
"4: movl $-2, %[err]\n" \
"jmp 3b\n" \
".popsection\n" \
- _ASM_EXTABLE(661b, 4b) \
+ _ASM_EXTABLE_UA(661b, 4b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index de4d68852d3a..13c83fe97988 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -20,7 +20,7 @@
"3:\tmov\t%3, %1\n" \
"\tjmp\t2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
: "i" (-EFAULT), "0" (oparg), "1" (0))
@@ -36,8 +36,8 @@
"4:\tmov\t%5, %1\n" \
"\tjmp\t3b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 4b) \
- _ASM_EXTABLE(2b, 4b) \
+ _ASM_EXTABLE_UA(1b, 4b) \
+ _ASM_EXTABLE_UA(2b, 4b) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index aae77eb8491c..b5e58cc0c5e7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -198,8 +198,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
"4: movl %3,%0\n" \
" jmp 3b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 4b) \
- _ASM_EXTABLE(2b, 4b) \
+ _ASM_EXTABLE_UA(1b, 4b) \
+ _ASM_EXTABLE_UA(2b, 4b) \
: "=r" (err) \
: "A" (x), "r" (addr), "i" (errret), "0" (err))
@@ -340,8 +340,8 @@ do { \
" xorl %%edx,%%edx\n" \
" jmp 3b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 4b) \
- _ASM_EXTABLE(2b, 4b) \
+ _ASM_EXTABLE_UA(1b, 4b) \
+ _ASM_EXTABLE_UA(2b, 4b) \
: "=r" (retval), "=&A"(x) \
: "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
"i" (errret), "0" (retval)); \
@@ -386,7 +386,7 @@ do { \
" xor"itype" %"rtype"1,%"rtype"1\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
@@ -398,7 +398,7 @@ do { \
"3: mov %3,%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
@@ -474,7 +474,7 @@ struct __large_struct { unsigned long buf[100]; };
"3: mov %3,%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "=r"(err) \
: ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
@@ -602,7 +602,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "q" (__new), "1" (__old) \
: "memory" \
@@ -618,7 +618,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
@@ -634,7 +634,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
@@ -653,7 +653,7 @@ extern void __cmpxchg_wrong_size(void)
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8c50754c09c1..e038e3f31e08 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1335,7 +1335,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
local_irq_disable();
ist_end_non_atomic();
} else {
- if (!fixup_exception(regs, X86_TRAP_MC))
+ if (!fixup_exception(regs, X86_TRAP_MC, 0))
mce_panic("Failed kernel mode recovery", &m, NULL);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6f4d42377fe5..5ad2639d3b8a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1044,7 +1044,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- if (fixup_exception(regs, trapnr))
+ if (fixup_exception(regs, trapnr, 0))
return 1;
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index e6db475164ed..0cd11b46072a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -206,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
}
if (!user_mode(regs)) {
- if (fixup_exception(regs, trapnr))
+ if (fixup_exception(regs, trapnr, 0))
return 0;
tsk->thread.error_code = error_code;
@@ -551,7 +551,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
tsk = current;
if (!user_mode(regs)) {
- if (fixup_exception(regs, X86_TRAP_GP))
+ if (fixup_exception(regs, X86_TRAP_GP, 0))
return;
tsk->thread.error_code = error_code;
@@ -838,7 +838,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
cond_local_irq_enable(regs);
if (!user_mode(regs)) {
- if (fixup_exception(regs, trapnr))
+ if (fixup_exception(regs, trapnr, 0))
return;
task->thread.error_code = error_code;
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 46e71a74e612..ad8e0906d1ea 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -273,11 +273,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
#define SRC(y...) \
9999: y; \
- _ASM_EXTABLE(9999b, 6001f)
+ _ASM_EXTABLE_UA(9999b, 6001f)
#define DST(y...) \
9999: y; \
- _ASM_EXTABLE(9999b, 6002f)
+ _ASM_EXTABLE_UA(9999b, 6002f)
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 020f75cc8cf6..80cfad666210 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -92,26 +92,26 @@ ENTRY(copy_user_generic_unrolled)
60: jmp copy_user_handle_tail /* ecx is zerorest also */
.previous
- _ASM_EXTABLE(1b,30b)
- _ASM_EXTABLE(2b,30b)
- _ASM_EXTABLE(3b,30b)
- _ASM_EXTABLE(4b,30b)
- _ASM_EXTABLE(5b,30b)
- _ASM_EXTABLE(6b,30b)
- _ASM_EXTABLE(7b,30b)
- _ASM_EXTABLE(8b,30b)
- _ASM_EXTABLE(9b,30b)
- _ASM_EXTABLE(10b,30b)
- _ASM_EXTABLE(11b,30b)
- _ASM_EXTABLE(12b,30b)
- _ASM_EXTABLE(13b,30b)
- _ASM_EXTABLE(14b,30b)
- _ASM_EXTABLE(15b,30b)
- _ASM_EXTABLE(16b,30b)
- _ASM_EXTABLE(18b,40b)
- _ASM_EXTABLE(19b,40b)
- _ASM_EXTABLE(21b,50b)
- _ASM_EXTABLE(22b,50b)
+ _ASM_EXTABLE_UA(1b,30b)
+ _ASM_EXTABLE_UA(2b,30b)
+ _ASM_EXTABLE_UA(3b,30b)
+ _ASM_EXTABLE_UA(4b,30b)
+ _ASM_EXTABLE_UA(5b,30b)
+ _ASM_EXTABLE_UA(6b,30b)
+ _ASM_EXTABLE_UA(7b,30b)
+ _ASM_EXTABLE_UA(8b,30b)
+ _ASM_EXTABLE_UA(9b,30b)
+ _ASM_EXTABLE_UA(10b,30b)
+ _ASM_EXTABLE_UA(11b,30b)
+ _ASM_EXTABLE_UA(12b,30b)
+ _ASM_EXTABLE_UA(13b,30b)
+ _ASM_EXTABLE_UA(14b,30b)
+ _ASM_EXTABLE_UA(15b,30b)
+ _ASM_EXTABLE_UA(16b,30b)
+ _ASM_EXTABLE_UA(18b,40b)
+ _ASM_EXTABLE_UA(19b,40b)
+ _ASM_EXTABLE_UA(21b,50b)
+ _ASM_EXTABLE_UA(22b,50b)
ENDPROC(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -156,8 +156,8 @@ ENTRY(copy_user_generic_string)
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(1b,11b)
- _ASM_EXTABLE(3b,12b)
+ _ASM_EXTABLE_UA(1b,11b)
+ _ASM_EXTABLE_UA(3b,12b)
ENDPROC(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
@@ -189,7 +189,7 @@ ENTRY(copy_user_enhanced_fast_string)
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(1b,12b)
+ _ASM_EXTABLE_UA(1b,12b)
ENDPROC(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
@@ -319,27 +319,27 @@ ENTRY(__copy_user_nocache)
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
- _ASM_EXTABLE(20b,.L_fixup_8b_copy)
- _ASM_EXTABLE(21b,.L_fixup_8b_copy)
- _ASM_EXTABLE(30b,.L_fixup_4b_copy)
- _ASM_EXTABLE(31b,.L_fixup_4b_copy)
- _ASM_EXTABLE(40b,.L_fixup_1b_copy)
- _ASM_EXTABLE(41b,.L_fixup_1b_copy)
+ _ASM_EXTABLE_UA(1b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(2b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(3b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(4b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(5b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(6b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(7b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(8b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(9b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(10b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(11b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(12b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(13b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(14b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(15b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(16b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE_UA(20b,.L_fixup_8b_copy)
+ _ASM_EXTABLE_UA(21b,.L_fixup_8b_copy)
+ _ASM_EXTABLE_UA(30b,.L_fixup_4b_copy)
+ _ASM_EXTABLE_UA(31b,.L_fixup_4b_copy)
+ _ASM_EXTABLE_UA(40b,.L_fixup_1b_copy)
+ _ASM_EXTABLE_UA(41b,.L_fixup_1b_copy)
ENDPROC(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 45a53dfe1859..969af904c74b 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -31,17 +31,17 @@
.macro source
10:
- _ASM_EXTABLE(10b, .Lbad_source)
+ _ASM_EXTABLE_UA(10b, .Lbad_source)
.endm
.macro dest
20:
- _ASM_EXTABLE(20b, .Lbad_dest)
+ _ASM_EXTABLE_UA(20b, .Lbad_dest)
.endm
.macro ignore L=.Lignore
30:
- _ASM_EXTABLE(30b, \L)
+ _ASM_EXTABLE_UA(30b, \L)
.endm
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 49b167f73215..884fe795d9d6 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -132,12 +132,12 @@ bad_get_user_8:
END(bad_get_user_8)
#endif
- _ASM_EXTABLE(1b,bad_get_user)
- _ASM_EXTABLE(2b,bad_get_user)
- _ASM_EXTABLE(3b,bad_get_user)
+ _ASM_EXTABLE_UA(1b,bad_get_user)
+ _ASM_EXTABLE_UA(2b,bad_get_user)
+ _ASM_EXTABLE_UA(3b,bad_get_user)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(4b,bad_get_user)
+ _ASM_EXTABLE_UA(4b,bad_get_user)
#else
- _ASM_EXTABLE(4b,bad_get_user_8)
- _ASM_EXTABLE(5b,bad_get_user_8)
+ _ASM_EXTABLE_UA(4b,bad_get_user_8)
+ _ASM_EXTABLE_UA(5b,bad_get_user_8)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 96dce5fe2a35..cdcf6143d953 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -94,10 +94,10 @@ bad_put_user:
EXIT
END(bad_put_user)
- _ASM_EXTABLE(1b,bad_put_user)
- _ASM_EXTABLE(2b,bad_put_user)
- _ASM_EXTABLE(3b,bad_put_user)
- _ASM_EXTABLE(4b,bad_put_user)
+ _ASM_EXTABLE_UA(1b,bad_put_user)
+ _ASM_EXTABLE_UA(2b,bad_put_user)
+ _ASM_EXTABLE_UA(3b,bad_put_user)
+ _ASM_EXTABLE_UA(4b,bad_put_user)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE(5b,bad_put_user)
+ _ASM_EXTABLE_UA(5b,bad_put_user)
#endif
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 7add8ba06887..92eb5956f2f3 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -47,8 +47,8 @@ do { \
"3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(0b,3b) \
- _ASM_EXTABLE(1b,2b) \
+ _ASM_EXTABLE_UA(0b,3b) \
+ _ASM_EXTABLE_UA(1b,2b) \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
} while (0)
@@ -153,44 +153,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
"101: lea 0(%%eax,%0,4),%0\n"
" jmp 100b\n"
".previous\n"
- _ASM_EXTABLE(1b,100b)
- _ASM_EXTABLE(2b,100b)
- _ASM_EXTABLE(3b,100b)
- _ASM_EXTABLE(4b,100b)
- _ASM_EXTABLE(5b,100b)
- _ASM_EXTABLE(6b,100b)
- _ASM_EXTABLE(7b,100b)
- _ASM_EXTABLE(8b,100b)
- _ASM_EXTABLE(9b,100b)
- _ASM_EXTABLE(10b,100b)
- _ASM_EXTABLE(11b,100b)
- _ASM_EXTABLE(12b,100b)
- _ASM_EXTABLE(13b,100b)
- _ASM_EXTABLE(14b,100b)
- _ASM_EXTABLE(15b,100b)
- _ASM_EXTABLE(16b,100b)
- _ASM_EXTABLE(17b,100b)
- _ASM_EXTABLE(18b,100b)
- _ASM_EXTABLE(19b,100b)
- _ASM_EXTABLE(20b,100b)
- _ASM_EXTABLE(21b,100b)
- _ASM_EXTABLE(22b,100b)
- _ASM_EXTABLE(23b,100b)
- _ASM_EXTABLE(24b,100b)
- _ASM_EXTABLE(25b,100b)
- _ASM_EXTABLE(26b,100b)
- _ASM_EXTABLE(27b,100b)
- _ASM_EXTABLE(28b,100b)
- _ASM_EXTABLE(29b,100b)
- _ASM_EXTABLE(30b,100b)
- _ASM_EXTABLE(31b,100b)
- _ASM_EXTABLE(32b,100b)
- _ASM_EXTABLE(33b,100b)
- _ASM_EXTABLE(34b,100b)
- _ASM_EXTABLE(35b,100b)
- _ASM_EXTABLE(36b,100b)
- _ASM_EXTABLE(37b,100b)
- _ASM_EXTABLE(99b,101b)
+ _ASM_EXTABLE_UA(1b,100b)
+ _ASM_EXTABLE_UA(2b,100b)
+ _ASM_EXTABLE_UA(3b,100b)
+ _ASM_EXTABLE_UA(4b,100b)
+ _ASM_EXTABLE_UA(5b,100b)
+ _ASM_EXTABLE_UA(6b,100b)
+ _ASM_EXTABLE_UA(7b,100b)
+ _ASM_EXTABLE_UA(8b,100b)
+ _ASM_EXTABLE_UA(9b,100b)
+ _ASM_EXTABLE_UA(10b,100b)
+ _ASM_EXTABLE_UA(11b,100b)
+ _ASM_EXTABLE_UA(12b,100b)
+ _ASM_EXTABLE_UA(13b,100b)
+ _ASM_EXTABLE_UA(14b,100b)
+ _ASM_EXTABLE_UA(15b,100b)
+ _ASM_EXTABLE_UA(16b,100b)
+ _ASM_EXTABLE_UA(17b,100b)
+ _ASM_EXTABLE_UA(18b,100b)
+ _ASM_EXTABLE_UA(19b,100b)
+ _ASM_EXTABLE_UA(20b,100b)
+ _ASM_EXTABLE_UA(21b,100b)
+ _ASM_EXTABLE_UA(22b,100b)
+ _ASM_EXTABLE_UA(23b,100b)
+ _ASM_EXTABLE_UA(24b,100b)
+ _ASM_EXTABLE_UA(25b,100b)
+ _ASM_EXTABLE_UA(26b,100b)
+ _ASM_EXTABLE_UA(27b,100b)
+ _ASM_EXTABLE_UA(28b,100b)
+ _ASM_EXTABLE_UA(29b,100b)
+ _ASM_EXTABLE_UA(30b,100b)
+ _ASM_EXTABLE_UA(31b,100b)
+ _ASM_EXTABLE_UA(32b,100b)
+ _ASM_EXTABLE_UA(33b,100b)
+ _ASM_EXTABLE_UA(34b,100b)
+ _ASM_EXTABLE_UA(35b,100b)
+ _ASM_EXTABLE_UA(36b,100b)
+ _ASM_EXTABLE_UA(37b,100b)
+ _ASM_EXTABLE_UA(99b,101b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -259,26 +259,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
"9: lea 0(%%eax,%0,4),%0\n"
"16: jmp 8b\n"
".previous\n"
- _ASM_EXTABLE(0b,16b)
- _ASM_EXTABLE(1b,16b)
- _ASM_EXTABLE(2b,16b)
- _ASM_EXTABLE(21b,16b)
- _ASM_EXTABLE(3b,16b)
- _ASM_EXTABLE(31b,16b)
- _ASM_EXTABLE(4b,16b)
- _ASM_EXTABLE(41b,16b)
- _ASM_EXTABLE(10b,16b)
- _ASM_EXTABLE(51b,16b)
- _ASM_EXTABLE(11b,16b)
- _ASM_EXTABLE(61b,16b)
- _ASM_EXTABLE(12b,16b)
- _ASM_EXTABLE(71b,16b)
- _ASM_EXTABLE(13b,16b)
- _ASM_EXTABLE(81b,16b)
- _ASM_EXTABLE(14b,16b)
- _ASM_EXTABLE(91b,16b)
- _ASM_EXTABLE(6b,9b)
- _ASM_EXTABLE(7b,16b)
+ _ASM_EXTABLE_UA(0b,16b)
+ _ASM_EXTABLE_UA(1b,16b)
+ _ASM_EXTABLE_UA(2b,16b)
+ _ASM_EXTABLE_UA(21b,16b)
+ _ASM_EXTABLE_UA(3b,16b)
+ _ASM_EXTABLE_UA(31b,16b)
+ _ASM_EXTABLE_UA(4b,16b)
+ _ASM_EXTABLE_UA(41b,16b)
+ _ASM_EXTABLE_UA(10b,16b)
+ _ASM_EXTABLE_UA(51b,16b)
+ _ASM_EXTABLE_UA(11b,16b)
+ _ASM_EXTABLE_UA(61b,16b)
+ _ASM_EXTABLE_UA(12b,16b)
+ _ASM_EXTABLE_UA(71b,16b)
+ _ASM_EXTABLE_UA(13b,16b)
+ _ASM_EXTABLE_UA(81b,16b)
+ _ASM_EXTABLE_UA(14b,16b)
+ _ASM_EXTABLE_UA(91b,16b)
+ _ASM_EXTABLE_UA(6b,9b)
+ _ASM_EXTABLE_UA(7b,16b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -321,9 +321,9 @@ do { \
"3: lea 0(%3,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- _ASM_EXTABLE(4b,5b) \
- _ASM_EXTABLE(0b,3b) \
- _ASM_EXTABLE(1b,2b) \
+ _ASM_EXTABLE_UA(4b,5b) \
+ _ASM_EXTABLE_UA(0b,3b) \
+ _ASM_EXTABLE_UA(1b,2b) \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 9c5606d88f61..c55b1f590cc4 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -37,8 +37,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
".previous\n"
- _ASM_EXTABLE(0b,3b)
- _ASM_EXTABLE(1b,2b)
+ _ASM_EXTABLE_UA(0b,3b)
+ _ASM_EXTABLE_UA(1b,2b)
: [size8] "=&c"(size), [dst] "=&D" (__d0)
: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
clac();
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 45f5d6cf65ae..3fd55a03892d 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -8,7 +8,7 @@
#include <asm/kdebug.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
- struct pt_regs *, int);
+ struct pt_regs *, int, unsigned long);
static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
@@ -22,7 +22,8 @@ ex_fixup_handler(const struct exception_table_entry *x)
}
__visible bool ex_handler_default(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
return true;
@@ -30,7 +31,8 @@ __visible bool ex_handler_default(const struct exception_table_entry *fixup,
EXPORT_SYMBOL(ex_handler_default);
__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
@@ -43,7 +45,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
* result of a refcount inc/dec/add/sub.
*/
__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
/* First unconditionally saturate the refcount. */
*(int *)regs->cx = INT_MIN / 2;
@@ -96,7 +99,8 @@ EXPORT_SYMBOL(ex_handler_refcount);
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
@@ -108,18 +112,53 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+static void bogus_uaccess_check(struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
+{
+ if (trapnr != X86_TRAP_PF || fault_addr < TASK_SIZE_MAX)
+ return;
+ /*
+ * This is a pagefault in kernel space, on a kernel address, in a
+ * usercopy function. This can e.g. be caused by improper use of helpers
+ * like __put_user and by improper attempts to access userspace
+ * addresses in KERNEL_DS regions. The one legitimate exception are
+ * probe_kernel_{read,write}(), which can be invoked from places like
+ * kgdb, /dev/mem (for reading) and privileged BPF code (for reading).
+ * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
+ * to tell us that faulting on kernel addresses in a userspace accessor
+ * is fine.
+ */
+ if (current->kernel_uaccess_faults_ok)
+ return;
+ WARN(1, "pagefault on kernel address 0x%lx in non-whitelisted uaccess",
+ fault_addr);
+}
+
+__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
+{
+ bogus_uaccess_check(regs, trapnr, fault_addr);
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_uaccess);
+
__visible bool ex_handler_ext(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
/* Special hack for uaccess_err */
current->thread.uaccess_err = 1;
+ bogus_uaccess_check(regs, trapnr, fault_addr);
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_ext);
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
@@ -134,7 +173,8 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
@@ -148,12 +188,13 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+ struct pt_regs *regs, int trapnr,
+ unsigned long fault_addr)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
asm volatile ("mov %0, %%fs" : : "rm" (0));
- return ex_handler_default(fixup, regs, trapnr);
+ return ex_handler_default(fixup, regs, trapnr, fault_addr);
}
EXPORT_SYMBOL(ex_handler_clear_fs);
@@ -170,7 +211,7 @@ __visible bool ex_has_fault_handler(unsigned long ip)
return handler == ex_handler_fault;
}
-int fixup_exception(struct pt_regs *regs, int trapnr)
+int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long fault_addr)
{
const struct exception_table_entry *e;
ex_handler_t handler;
@@ -194,7 +235,7 @@ int fixup_exception(struct pt_regs *regs, int trapnr)
return 0;
handler = ex_fixup_handler(e);
- return handler(e, regs, trapnr);
+ return handler(e, regs, trapnr, fault_addr);
}
extern unsigned int early_recursion_flag;
@@ -232,7 +273,7 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
* Keep in mind that not all vectors actually get here. Early
* fage faults, for example, are special.
*/
- if (fixup_exception(regs, trapnr))
+ if (fixup_exception(regs, trapnr, 0))
return;
if (fixup_bug(regs, trapnr))
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2aafa6ab6103..96e3e5cf2cfc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -710,7 +710,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig;
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs, X86_TRAP_PF)) {
+ if (fixup_exception(regs, X86_TRAP_PF, address)) {
/*
* Any interrupt that takes a fault gets the fixup. This makes
* the below recursive fault logic only apply to a faults from
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..b50598761ed4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,8 @@ struct task_struct {
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
#endif
+ /* usercopy functions may fault on kernel addresses */
+ unsigned int kernel_uaccess_faults_ok:1;
unsigned long atomic_flags; /* Flags requiring atomic access. */
diff --git a/mm/maccess.c b/mm/maccess.c
index ec00be51a24f..e066aa8482af 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -30,8 +30,10 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
set_fs(KERNEL_DS);
pagefault_disable();
+ current->kernel_uaccess_faults_ok = 1;
ret = __copy_from_user_inatomic(dst,
(__force const void __user *)src, size);
+ current->kernel_uaccess_faults_ok = 0;
pagefault_enable();
set_fs(old_fs);
@@ -58,7 +60,9 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
set_fs(KERNEL_DS);
pagefault_disable();
+ current->kernel_uaccess_faults_ok = 1;
ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
+ current->kernel_uaccess_faults_ok = 0;
pagefault_enable();
set_fs(old_fs);
@@ -94,11 +98,13 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
set_fs(KERNEL_DS);
pagefault_disable();
+ current->kernel_uaccess_faults_ok = 1;
do {
ret = __get_user(*dst++, (const char __user __force *)src++);
} while (dst[-1] && ret == 0 && src - unsafe_addr < count);
+ current->kernel_uaccess_faults_ok = 0;
dst[-1] = '\0';
pagefault_enable();
set_fs(old_fs);
--
2.18.0.597.ga71716f1ad-goog
Test whether the kernel WARN()s when, under KERNEL_DS, a bad kernel pointer
is used as "userspace" pointer. Test with "DIRECT" mode.
Signed-off-by: Jann Horn <[email protected]>
---
drivers/misc/lkdtm/core.c | 1 +
drivers/misc/lkdtm/lkdtm.h | 1 +
drivers/misc/lkdtm/usercopy.c | 13 +++++++++++++
3 files changed, 15 insertions(+)
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 2154d1bfd18b..5a755590d3dc 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -183,6 +183,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
CRASHTYPE(USERCOPY_STACK_BEYOND),
CRASHTYPE(USERCOPY_KERNEL),
+ CRASHTYPE(USERCOPY_KERNEL_DS),
};
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 9e513dcfd809..07db641d71d0 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -82,5 +82,6 @@ void lkdtm_USERCOPY_STACK_FRAME_TO(void);
void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
void lkdtm_USERCOPY_STACK_BEYOND(void);
void lkdtm_USERCOPY_KERNEL(void);
+void lkdtm_USERCOPY_KERNEL_DS(void);
#endif
diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
index 9725aed305bb..389475b25bb7 100644
--- a/drivers/misc/lkdtm/usercopy.c
+++ b/drivers/misc/lkdtm/usercopy.c
@@ -322,6 +322,19 @@ void lkdtm_USERCOPY_KERNEL(void)
vm_munmap(user_addr, PAGE_SIZE);
}
+void lkdtm_USERCOPY_KERNEL_DS(void)
+{
+ char __user *user_ptr = (char __user *)ERR_PTR(-EINVAL);
+ mm_segment_t old_fs = get_fs();
+ char buf[10] = {0};
+
+ pr_info("attempting copy_to_user on unmapped kernel address\n");
+ set_fs(KERNEL_DS);
+ if (copy_to_user(user_ptr, buf, sizeof(buf)))
+ pr_info("copy_to_user un unmapped kernel address failed\n");
+ set_fs(old_fs);
+}
+
void __init lkdtm_usercopy_init(void)
{
/* Prepare cache that lacks SLAB_USERCOPY flag. */
--
2.18.0.597.ga71716f1ad-goog
> On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
>
> There have been multiple kernel vulnerabilities that permitted userspace to
> pass completely unchecked pointers through to userspace accessors:
>
> - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
> access_ok() checks")
> - the sg/bsg read/write APIs
> - the infiniband read/write APIs
>
> These don't happen all that often, but when they do happen, it is hard to
> test for them properly; and it is probably also hard to discover them with
> fuzzing. Even when an unmapped kernel address is supplied to such buggy
> code, it just returns -EFAULT instead of doing a proper BUG() or at least
> WARN().
>
> This patch attempts to make such misbehaving code a bit more visible by
> WARN()ing in the pagefault handler code when a userspace accessor causes
> #PF on a kernel address and the current context isn't whitelisted.
I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before the fancy extable code, though, so it was a mess. Here are some thoughts:
- It should be three patches. One patch to add the _UA annotations, one to improve the info passes to the handlers, and one to change behavior.
- You should pass the vector, the error code, and the address to the handler.
- The uaccess handler should IMO WARN if the vector is anything other than #PF (which mainly means warning if it’s #GP). I think it should pr_emerg() and return false if the vector is #PF and the address is too high.
- Arguably most non-uaccess fixups should at least warn for anything other than #GP and #UD.
>
> Signed-off-by: Jann Horn <[email protected]>
> ---
> This is a hacky, quickly thrown-together PoC to see what people think
> about the basic idea. Does it look like a sensible change? Or would it
> be better, for example, to instead expand the KASan hooks in the
> usercopy logic to also look at the "user" pointer if it points to
> kernelspace? That would have the advantage of also catching heap
> overflows properly...
>
> I'm not really happy with all the plumbing in my patch; I wonder whether
> there's a way around introducing _ASM_EXTABLE_UA() for user accessors?
>
> arch/x86/include/asm/asm.h | 10 ++-
> arch/x86/include/asm/extable.h | 3 +-
> arch/x86/include/asm/fpu/internal.h | 6 +-
> arch/x86/include/asm/futex.h | 6 +-
> arch/x86/include/asm/uaccess.h | 22 ++---
> arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
> arch/x86/kernel/kprobes/core.c | 2 +-
> arch/x86/kernel/traps.c | 6 +-
> arch/x86/lib/checksum_32.S | 4 +-
> arch/x86/lib/copy_user_64.S | 90 ++++++++++----------
> arch/x86/lib/csum-copy_64.S | 6 +-
> arch/x86/lib/getuser.S | 12 +--
> arch/x86/lib/putuser.S | 10 +--
> arch/x86/lib/usercopy_32.c | 126 ++++++++++++++--------------
> arch/x86/lib/usercopy_64.c | 4 +-
> arch/x86/mm/extable.c | 67 ++++++++++++---
> arch/x86/mm/fault.c | 2 +-
> include/linux/sched.h | 2 +
> mm/maccess.c | 6 ++
> 19 files changed, 221 insertions(+), 165 deletions(-)
>
> diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
> index 990770f9e76b..38f44a773adf 100644
> --- a/arch/x86/include/asm/asm.h
> +++ b/arch/x86/include/asm/asm.h
> @@ -130,6 +130,9 @@
> # define _ASM_EXTABLE(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
>
> +# define _ASM_EXTABLE_UA(from, to) \
> + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
> +
> # define _ASM_EXTABLE_FAULT(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
>
> @@ -165,8 +168,8 @@
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(100b,103b)
> - _ASM_EXTABLE(101b,103b)
> + _ASM_EXTABLE_UA(100b,103b)
> + _ASM_EXTABLE_UA(101b,103b)
> .endm
>
> #else
> @@ -182,6 +185,9 @@
> # define _ASM_EXTABLE(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
>
> +# define _ASM_EXTABLE_UA(from, to) \
> + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
> +
> # define _ASM_EXTABLE_FAULT(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
>
> diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
> index f9c3a5d502f4..93c1d28f3c73 100644
> --- a/arch/x86/include/asm/extable.h
> +++ b/arch/x86/include/asm/extable.h
> @@ -29,7 +29,8 @@ struct pt_regs;
> (b)->handler = (tmp).handler - (delta); \
> } while (0)
>
> -extern int fixup_exception(struct pt_regs *regs, int trapnr);
> +extern int fixup_exception(struct pt_regs *regs, int trapnr,
> + unsigned long pf_addr);
> extern int fixup_bug(struct pt_regs *regs, int trapnr);
> extern bool ex_has_fault_handler(unsigned long ip);
> extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
> diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
> index a38bf5a1e37a..640e7721138c 100644
> --- a/arch/x86/include/asm/fpu/internal.h
> +++ b/arch/x86/include/asm/fpu/internal.h
> @@ -113,7 +113,7 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
> "3: movl $-1,%[err]\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : [err] "=r" (err), output \
> : "0"(0), input); \
> err; \
> @@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
> "3: movl $-2,%[err]\n\t" \
> "jmp 2b\n\t" \
> ".popsection\n\t" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : [err] "=r" (err) \
> : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
> : "memory")
> @@ -256,7 +256,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
> "4: movl $-2, %[err]\n" \
> "jmp 3b\n" \
> ".popsection\n" \
> - _ASM_EXTABLE(661b, 4b) \
> + _ASM_EXTABLE_UA(661b, 4b) \
> : [err] "=r" (err) \
> : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
> : "memory")
> diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
> index de4d68852d3a..13c83fe97988 100644
> --- a/arch/x86/include/asm/futex.h
> +++ b/arch/x86/include/asm/futex.h
> @@ -20,7 +20,7 @@
> "3:\tmov\t%3, %1\n" \
> "\tjmp\t2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
> : "i" (-EFAULT), "0" (oparg), "1" (0))
>
> @@ -36,8 +36,8 @@
> "4:\tmov\t%5, %1\n" \
> "\tjmp\t3b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 4b) \
> - _ASM_EXTABLE(2b, 4b) \
> + _ASM_EXTABLE_UA(1b, 4b) \
> + _ASM_EXTABLE_UA(2b, 4b) \
> : "=&a" (oldval), "=&r" (ret), \
> "+m" (*uaddr), "=&r" (tem) \
> : "r" (oparg), "i" (-EFAULT), "1" (0))
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index aae77eb8491c..b5e58cc0c5e7 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -198,8 +198,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
> "4: movl %3,%0\n" \
> " jmp 3b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 4b) \
> - _ASM_EXTABLE(2b, 4b) \
> + _ASM_EXTABLE_UA(1b, 4b) \
> + _ASM_EXTABLE_UA(2b, 4b) \
> : "=r" (err) \
> : "A" (x), "r" (addr), "i" (errret), "0" (err))
>
> @@ -340,8 +340,8 @@ do { \
> " xorl %%edx,%%edx\n" \
> " jmp 3b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 4b) \
> - _ASM_EXTABLE(2b, 4b) \
> + _ASM_EXTABLE_UA(1b, 4b) \
> + _ASM_EXTABLE_UA(2b, 4b) \
> : "=r" (retval), "=&A"(x) \
> : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
> "i" (errret), "0" (retval)); \
> @@ -386,7 +386,7 @@ do { \
> " xor"itype" %"rtype"1,%"rtype"1\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r" (err), ltype(x) \
> : "m" (__m(addr)), "i" (errret), "0" (err))
>
> @@ -398,7 +398,7 @@ do { \
> "3: mov %3,%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r" (err), ltype(x) \
> : "m" (__m(addr)), "i" (errret), "0" (err))
>
> @@ -474,7 +474,7 @@ struct __large_struct { unsigned long buf[100]; };
> "3: mov %3,%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r"(err) \
> : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
>
> @@ -602,7 +602,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "q" (__new), "1" (__old) \
> : "memory" \
> @@ -618,7 +618,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "r" (__new), "1" (__old) \
> : "memory" \
> @@ -634,7 +634,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "r" (__new), "1" (__old) \
> : "memory" \
> @@ -653,7 +653,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "r" (__new), "1" (__old) \
> : "memory" \
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 8c50754c09c1..e038e3f31e08 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -1335,7 +1335,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
> local_irq_disable();
> ist_end_non_atomic();
> } else {
> - if (!fixup_exception(regs, X86_TRAP_MC))
> + if (!fixup_exception(regs, X86_TRAP_MC, 0))
> mce_panic("Failed kernel mode recovery", &m, NULL);
> }
>
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index 6f4d42377fe5..5ad2639d3b8a 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -1044,7 +1044,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
> * In case the user-specified fault handler returned
> * zero, try to fix up.
> */
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return 1;
>
> /*
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index e6db475164ed..0cd11b46072a 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -206,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
> }
>
> if (!user_mode(regs)) {
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return 0;
>
> tsk->thread.error_code = error_code;
> @@ -551,7 +551,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
>
> tsk = current;
> if (!user_mode(regs)) {
> - if (fixup_exception(regs, X86_TRAP_GP))
> + if (fixup_exception(regs, X86_TRAP_GP, 0))
> return;
>
> tsk->thread.error_code = error_code;
> @@ -838,7 +838,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
> cond_local_irq_enable(regs);
>
> if (!user_mode(regs)) {
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return;
>
> task->thread.error_code = error_code;
> diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
> index 46e71a74e612..ad8e0906d1ea 100644
> --- a/arch/x86/lib/checksum_32.S
> +++ b/arch/x86/lib/checksum_32.S
> @@ -273,11 +273,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
>
> #define SRC(y...) \
> 9999: y; \
> - _ASM_EXTABLE(9999b, 6001f)
> + _ASM_EXTABLE_UA(9999b, 6001f)
>
> #define DST(y...) \
> 9999: y; \
> - _ASM_EXTABLE(9999b, 6002f)
> + _ASM_EXTABLE_UA(9999b, 6002f)
>
> #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
>
> diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
> index 020f75cc8cf6..80cfad666210 100644
> --- a/arch/x86/lib/copy_user_64.S
> +++ b/arch/x86/lib/copy_user_64.S
> @@ -92,26 +92,26 @@ ENTRY(copy_user_generic_unrolled)
> 60: jmp copy_user_handle_tail /* ecx is zerorest also */
> .previous
>
> - _ASM_EXTABLE(1b,30b)
> - _ASM_EXTABLE(2b,30b)
> - _ASM_EXTABLE(3b,30b)
> - _ASM_EXTABLE(4b,30b)
> - _ASM_EXTABLE(5b,30b)
> - _ASM_EXTABLE(6b,30b)
> - _ASM_EXTABLE(7b,30b)
> - _ASM_EXTABLE(8b,30b)
> - _ASM_EXTABLE(9b,30b)
> - _ASM_EXTABLE(10b,30b)
> - _ASM_EXTABLE(11b,30b)
> - _ASM_EXTABLE(12b,30b)
> - _ASM_EXTABLE(13b,30b)
> - _ASM_EXTABLE(14b,30b)
> - _ASM_EXTABLE(15b,30b)
> - _ASM_EXTABLE(16b,30b)
> - _ASM_EXTABLE(18b,40b)
> - _ASM_EXTABLE(19b,40b)
> - _ASM_EXTABLE(21b,50b)
> - _ASM_EXTABLE(22b,50b)
> + _ASM_EXTABLE_UA(1b,30b)
> + _ASM_EXTABLE_UA(2b,30b)
> + _ASM_EXTABLE_UA(3b,30b)
> + _ASM_EXTABLE_UA(4b,30b)
> + _ASM_EXTABLE_UA(5b,30b)
> + _ASM_EXTABLE_UA(6b,30b)
> + _ASM_EXTABLE_UA(7b,30b)
> + _ASM_EXTABLE_UA(8b,30b)
> + _ASM_EXTABLE_UA(9b,30b)
> + _ASM_EXTABLE_UA(10b,30b)
> + _ASM_EXTABLE_UA(11b,30b)
> + _ASM_EXTABLE_UA(12b,30b)
> + _ASM_EXTABLE_UA(13b,30b)
> + _ASM_EXTABLE_UA(14b,30b)
> + _ASM_EXTABLE_UA(15b,30b)
> + _ASM_EXTABLE_UA(16b,30b)
> + _ASM_EXTABLE_UA(18b,40b)
> + _ASM_EXTABLE_UA(19b,40b)
> + _ASM_EXTABLE_UA(21b,50b)
> + _ASM_EXTABLE_UA(22b,50b)
> ENDPROC(copy_user_generic_unrolled)
> EXPORT_SYMBOL(copy_user_generic_unrolled)
>
> @@ -156,8 +156,8 @@ ENTRY(copy_user_generic_string)
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(1b,11b)
> - _ASM_EXTABLE(3b,12b)
> + _ASM_EXTABLE_UA(1b,11b)
> + _ASM_EXTABLE_UA(3b,12b)
> ENDPROC(copy_user_generic_string)
> EXPORT_SYMBOL(copy_user_generic_string)
>
> @@ -189,7 +189,7 @@ ENTRY(copy_user_enhanced_fast_string)
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(1b,12b)
> + _ASM_EXTABLE_UA(1b,12b)
> ENDPROC(copy_user_enhanced_fast_string)
> EXPORT_SYMBOL(copy_user_enhanced_fast_string)
>
> @@ -319,27 +319,27 @@ ENTRY(__copy_user_nocache)
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(20b,.L_fixup_8b_copy)
> - _ASM_EXTABLE(21b,.L_fixup_8b_copy)
> - _ASM_EXTABLE(30b,.L_fixup_4b_copy)
> - _ASM_EXTABLE(31b,.L_fixup_4b_copy)
> - _ASM_EXTABLE(40b,.L_fixup_1b_copy)
> - _ASM_EXTABLE(41b,.L_fixup_1b_copy)
> + _ASM_EXTABLE_UA(1b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(2b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(3b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(4b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(5b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(6b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(7b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(8b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(9b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(10b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(11b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(12b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(13b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(14b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(15b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(16b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(20b,.L_fixup_8b_copy)
> + _ASM_EXTABLE_UA(21b,.L_fixup_8b_copy)
> + _ASM_EXTABLE_UA(30b,.L_fixup_4b_copy)
> + _ASM_EXTABLE_UA(31b,.L_fixup_4b_copy)
> + _ASM_EXTABLE_UA(40b,.L_fixup_1b_copy)
> + _ASM_EXTABLE_UA(41b,.L_fixup_1b_copy)
> ENDPROC(__copy_user_nocache)
> EXPORT_SYMBOL(__copy_user_nocache)
> diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
> index 45a53dfe1859..969af904c74b 100644
> --- a/arch/x86/lib/csum-copy_64.S
> +++ b/arch/x86/lib/csum-copy_64.S
> @@ -31,17 +31,17 @@
>
> .macro source
> 10:
> - _ASM_EXTABLE(10b, .Lbad_source)
> + _ASM_EXTABLE_UA(10b, .Lbad_source)
> .endm
>
> .macro dest
> 20:
> - _ASM_EXTABLE(20b, .Lbad_dest)
> + _ASM_EXTABLE_UA(20b, .Lbad_dest)
> .endm
>
> .macro ignore L=.Lignore
> 30:
> - _ASM_EXTABLE(30b, \L)
> + _ASM_EXTABLE_UA(30b, \L)
> .endm
>
>
> diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
> index 49b167f73215..884fe795d9d6 100644
> --- a/arch/x86/lib/getuser.S
> +++ b/arch/x86/lib/getuser.S
> @@ -132,12 +132,12 @@ bad_get_user_8:
> END(bad_get_user_8)
> #endif
>
> - _ASM_EXTABLE(1b,bad_get_user)
> - _ASM_EXTABLE(2b,bad_get_user)
> - _ASM_EXTABLE(3b,bad_get_user)
> + _ASM_EXTABLE_UA(1b,bad_get_user)
> + _ASM_EXTABLE_UA(2b,bad_get_user)
> + _ASM_EXTABLE_UA(3b,bad_get_user)
> #ifdef CONFIG_X86_64
> - _ASM_EXTABLE(4b,bad_get_user)
> + _ASM_EXTABLE_UA(4b,bad_get_user)
> #else
> - _ASM_EXTABLE(4b,bad_get_user_8)
> - _ASM_EXTABLE(5b,bad_get_user_8)
> + _ASM_EXTABLE_UA(4b,bad_get_user_8)
> + _ASM_EXTABLE_UA(5b,bad_get_user_8)
> #endif
> diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
> index 96dce5fe2a35..cdcf6143d953 100644
> --- a/arch/x86/lib/putuser.S
> +++ b/arch/x86/lib/putuser.S
> @@ -94,10 +94,10 @@ bad_put_user:
> EXIT
> END(bad_put_user)
>
> - _ASM_EXTABLE(1b,bad_put_user)
> - _ASM_EXTABLE(2b,bad_put_user)
> - _ASM_EXTABLE(3b,bad_put_user)
> - _ASM_EXTABLE(4b,bad_put_user)
> + _ASM_EXTABLE_UA(1b,bad_put_user)
> + _ASM_EXTABLE_UA(2b,bad_put_user)
> + _ASM_EXTABLE_UA(3b,bad_put_user)
> + _ASM_EXTABLE_UA(4b,bad_put_user)
> #ifdef CONFIG_X86_32
> - _ASM_EXTABLE(5b,bad_put_user)
> + _ASM_EXTABLE_UA(5b,bad_put_user)
> #endif
> diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
> index 7add8ba06887..92eb5956f2f3 100644
> --- a/arch/x86/lib/usercopy_32.c
> +++ b/arch/x86/lib/usercopy_32.c
> @@ -47,8 +47,8 @@ do { \
> "3: lea 0(%2,%0,4),%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(0b,3b) \
> - _ASM_EXTABLE(1b,2b) \
> + _ASM_EXTABLE_UA(0b,3b) \
> + _ASM_EXTABLE_UA(1b,2b) \
> : "=&c"(size), "=&D" (__d0) \
> : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
> } while (0)
> @@ -153,44 +153,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
> "101: lea 0(%%eax,%0,4),%0\n"
> " jmp 100b\n"
> ".previous\n"
> - _ASM_EXTABLE(1b,100b)
> - _ASM_EXTABLE(2b,100b)
> - _ASM_EXTABLE(3b,100b)
> - _ASM_EXTABLE(4b,100b)
> - _ASM_EXTABLE(5b,100b)
> - _ASM_EXTABLE(6b,100b)
> - _ASM_EXTABLE(7b,100b)
> - _ASM_EXTABLE(8b,100b)
> - _ASM_EXTABLE(9b,100b)
> - _ASM_EXTABLE(10b,100b)
> - _ASM_EXTABLE(11b,100b)
> - _ASM_EXTABLE(12b,100b)
> - _ASM_EXTABLE(13b,100b)
> - _ASM_EXTABLE(14b,100b)
> - _ASM_EXTABLE(15b,100b)
> - _ASM_EXTABLE(16b,100b)
> - _ASM_EXTABLE(17b,100b)
> - _ASM_EXTABLE(18b,100b)
> - _ASM_EXTABLE(19b,100b)
> - _ASM_EXTABLE(20b,100b)
> - _ASM_EXTABLE(21b,100b)
> - _ASM_EXTABLE(22b,100b)
> - _ASM_EXTABLE(23b,100b)
> - _ASM_EXTABLE(24b,100b)
> - _ASM_EXTABLE(25b,100b)
> - _ASM_EXTABLE(26b,100b)
> - _ASM_EXTABLE(27b,100b)
> - _ASM_EXTABLE(28b,100b)
> - _ASM_EXTABLE(29b,100b)
> - _ASM_EXTABLE(30b,100b)
> - _ASM_EXTABLE(31b,100b)
> - _ASM_EXTABLE(32b,100b)
> - _ASM_EXTABLE(33b,100b)
> - _ASM_EXTABLE(34b,100b)
> - _ASM_EXTABLE(35b,100b)
> - _ASM_EXTABLE(36b,100b)
> - _ASM_EXTABLE(37b,100b)
> - _ASM_EXTABLE(99b,101b)
> + _ASM_EXTABLE_UA(1b,100b)
> + _ASM_EXTABLE_UA(2b,100b)
> + _ASM_EXTABLE_UA(3b,100b)
> + _ASM_EXTABLE_UA(4b,100b)
> + _ASM_EXTABLE_UA(5b,100b)
> + _ASM_EXTABLE_UA(6b,100b)
> + _ASM_EXTABLE_UA(7b,100b)
> + _ASM_EXTABLE_UA(8b,100b)
> + _ASM_EXTABLE_UA(9b,100b)
> + _ASM_EXTABLE_UA(10b,100b)
> + _ASM_EXTABLE_UA(11b,100b)
> + _ASM_EXTABLE_UA(12b,100b)
> + _ASM_EXTABLE_UA(13b,100b)
> + _ASM_EXTABLE_UA(14b,100b)
> + _ASM_EXTABLE_UA(15b,100b)
> + _ASM_EXTABLE_UA(16b,100b)
> + _ASM_EXTABLE_UA(17b,100b)
> + _ASM_EXTABLE_UA(18b,100b)
> + _ASM_EXTABLE_UA(19b,100b)
> + _ASM_EXTABLE_UA(20b,100b)
> + _ASM_EXTABLE_UA(21b,100b)
> + _ASM_EXTABLE_UA(22b,100b)
> + _ASM_EXTABLE_UA(23b,100b)
> + _ASM_EXTABLE_UA(24b,100b)
> + _ASM_EXTABLE_UA(25b,100b)
> + _ASM_EXTABLE_UA(26b,100b)
> + _ASM_EXTABLE_UA(27b,100b)
> + _ASM_EXTABLE_UA(28b,100b)
> + _ASM_EXTABLE_UA(29b,100b)
> + _ASM_EXTABLE_UA(30b,100b)
> + _ASM_EXTABLE_UA(31b,100b)
> + _ASM_EXTABLE_UA(32b,100b)
> + _ASM_EXTABLE_UA(33b,100b)
> + _ASM_EXTABLE_UA(34b,100b)
> + _ASM_EXTABLE_UA(35b,100b)
> + _ASM_EXTABLE_UA(36b,100b)
> + _ASM_EXTABLE_UA(37b,100b)
> + _ASM_EXTABLE_UA(99b,101b)
> : "=&c"(size), "=&D" (d0), "=&S" (d1)
> : "1"(to), "2"(from), "0"(size)
> : "eax", "edx", "memory");
> @@ -259,26 +259,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
> "9: lea 0(%%eax,%0,4),%0\n"
> "16: jmp 8b\n"
> ".previous\n"
> - _ASM_EXTABLE(0b,16b)
> - _ASM_EXTABLE(1b,16b)
> - _ASM_EXTABLE(2b,16b)
> - _ASM_EXTABLE(21b,16b)
> - _ASM_EXTABLE(3b,16b)
> - _ASM_EXTABLE(31b,16b)
> - _ASM_EXTABLE(4b,16b)
> - _ASM_EXTABLE(41b,16b)
> - _ASM_EXTABLE(10b,16b)
> - _ASM_EXTABLE(51b,16b)
> - _ASM_EXTABLE(11b,16b)
> - _ASM_EXTABLE(61b,16b)
> - _ASM_EXTABLE(12b,16b)
> - _ASM_EXTABLE(71b,16b)
> - _ASM_EXTABLE(13b,16b)
> - _ASM_EXTABLE(81b,16b)
> - _ASM_EXTABLE(14b,16b)
> - _ASM_EXTABLE(91b,16b)
> - _ASM_EXTABLE(6b,9b)
> - _ASM_EXTABLE(7b,16b)
> + _ASM_EXTABLE_UA(0b,16b)
> + _ASM_EXTABLE_UA(1b,16b)
> + _ASM_EXTABLE_UA(2b,16b)
> + _ASM_EXTABLE_UA(21b,16b)
> + _ASM_EXTABLE_UA(3b,16b)
> + _ASM_EXTABLE_UA(31b,16b)
> + _ASM_EXTABLE_UA(4b,16b)
> + _ASM_EXTABLE_UA(41b,16b)
> + _ASM_EXTABLE_UA(10b,16b)
> + _ASM_EXTABLE_UA(51b,16b)
> + _ASM_EXTABLE_UA(11b,16b)
> + _ASM_EXTABLE_UA(61b,16b)
> + _ASM_EXTABLE_UA(12b,16b)
> + _ASM_EXTABLE_UA(71b,16b)
> + _ASM_EXTABLE_UA(13b,16b)
> + _ASM_EXTABLE_UA(81b,16b)
> + _ASM_EXTABLE_UA(14b,16b)
> + _ASM_EXTABLE_UA(91b,16b)
> + _ASM_EXTABLE_UA(6b,9b)
> + _ASM_EXTABLE_UA(7b,16b)
> : "=&c"(size), "=&D" (d0), "=&S" (d1)
> : "1"(to), "2"(from), "0"(size)
> : "eax", "edx", "memory");
> @@ -321,9 +321,9 @@ do { \
> "3: lea 0(%3,%0,4),%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(4b,5b) \
> - _ASM_EXTABLE(0b,3b) \
> - _ASM_EXTABLE(1b,2b) \
> + _ASM_EXTABLE_UA(4b,5b) \
> + _ASM_EXTABLE_UA(0b,3b) \
> + _ASM_EXTABLE_UA(1b,2b) \
> : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
> : "3"(size), "0"(size), "1"(to), "2"(from) \
> : "memory"); \
> diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
> index 9c5606d88f61..c55b1f590cc4 100644
> --- a/arch/x86/lib/usercopy_64.c
> +++ b/arch/x86/lib/usercopy_64.c
> @@ -37,8 +37,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
> "3: lea 0(%[size1],%[size8],8),%[size8]\n"
> " jmp 2b\n"
> ".previous\n"
> - _ASM_EXTABLE(0b,3b)
> - _ASM_EXTABLE(1b,2b)
> + _ASM_EXTABLE_UA(0b,3b)
> + _ASM_EXTABLE_UA(1b,2b)
> : [size8] "=&c"(size), [dst] "=&D" (__d0)
> : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
> clac();
> diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
> index 45f5d6cf65ae..3fd55a03892d 100644
> --- a/arch/x86/mm/extable.c
> +++ b/arch/x86/mm/extable.c
> @@ -8,7 +8,7 @@
> #include <asm/kdebug.h>
>
> typedef bool (*ex_handler_t)(const struct exception_table_entry *,
> - struct pt_regs *, int);
> + struct pt_regs *, int, unsigned long);
>
> static inline unsigned long
> ex_fixup_addr(const struct exception_table_entry *x)
> @@ -22,7 +22,8 @@ ex_fixup_handler(const struct exception_table_entry *x)
> }
>
> __visible bool ex_handler_default(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> regs->ip = ex_fixup_addr(fixup);
> return true;
> @@ -30,7 +31,8 @@ __visible bool ex_handler_default(const struct exception_table_entry *fixup,
> EXPORT_SYMBOL(ex_handler_default);
>
> __visible bool ex_handler_fault(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> regs->ip = ex_fixup_addr(fixup);
> regs->ax = trapnr;
> @@ -43,7 +45,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
> * result of a refcount inc/dec/add/sub.
> */
> __visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> /* First unconditionally saturate the refcount. */
> *(int *)regs->cx = INT_MIN / 2;
> @@ -96,7 +99,8 @@ EXPORT_SYMBOL(ex_handler_refcount);
> * out all the FPU registers) if we can't restore from the task's FPU state.
> */
> __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> regs->ip = ex_fixup_addr(fixup);
>
> @@ -108,18 +112,53 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
> }
> EXPORT_SYMBOL_GPL(ex_handler_fprestore);
>
> +static void bogus_uaccess_check(struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> +{
> + if (trapnr != X86_TRAP_PF || fault_addr < TASK_SIZE_MAX)
> + return;
> + /*
> + * This is a pagefault in kernel space, on a kernel address, in a
> + * usercopy function. This can e.g. be caused by improper use of helpers
> + * like __put_user and by improper attempts to access userspace
> + * addresses in KERNEL_DS regions. The one legitimate exception are
> + * probe_kernel_{read,write}(), which can be invoked from places like
> + * kgdb, /dev/mem (for reading) and privileged BPF code (for reading).
> + * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
> + * to tell us that faulting on kernel addresses in a userspace accessor
> + * is fine.
> + */
> + if (current->kernel_uaccess_faults_ok)
> + return;
> + WARN(1, "pagefault on kernel address 0x%lx in non-whitelisted uaccess",
> + fault_addr);
> +}
> +
> +__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> +{
> + bogus_uaccess_check(regs, trapnr, fault_addr);
> + regs->ip = ex_fixup_addr(fixup);
> + return true;
> +}
> +EXPORT_SYMBOL(ex_handler_uaccess);
> +
> __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> /* Special hack for uaccess_err */
> current->thread.uaccess_err = 1;
> + bogus_uaccess_check(regs, trapnr, fault_addr);
> regs->ip = ex_fixup_addr(fixup);
> return true;
> }
> EXPORT_SYMBOL(ex_handler_ext);
>
> __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
> (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
> @@ -134,7 +173,8 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup
> EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
>
> __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
> (unsigned int)regs->cx, (unsigned int)regs->dx,
> @@ -148,12 +188,13 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
> EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
>
> __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> if (static_cpu_has(X86_BUG_NULL_SEG))
> asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
> asm volatile ("mov %0, %%fs" : : "rm" (0));
> - return ex_handler_default(fixup, regs, trapnr);
> + return ex_handler_default(fixup, regs, trapnr, fault_addr);
> }
> EXPORT_SYMBOL(ex_handler_clear_fs);
>
> @@ -170,7 +211,7 @@ __visible bool ex_has_fault_handler(unsigned long ip)
> return handler == ex_handler_fault;
> }
>
> -int fixup_exception(struct pt_regs *regs, int trapnr)
> +int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long fault_addr)
> {
> const struct exception_table_entry *e;
> ex_handler_t handler;
> @@ -194,7 +235,7 @@ int fixup_exception(struct pt_regs *regs, int trapnr)
> return 0;
>
> handler = ex_fixup_handler(e);
> - return handler(e, regs, trapnr);
> + return handler(e, regs, trapnr, fault_addr);
> }
>
> extern unsigned int early_recursion_flag;
> @@ -232,7 +273,7 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
> * Keep in mind that not all vectors actually get here. Early
> * fage faults, for example, are special.
> */
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return;
>
> if (fixup_bug(regs, trapnr))
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 2aafa6ab6103..96e3e5cf2cfc 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -710,7 +710,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
> int sig;
>
> /* Are we prepared to handle this kernel fault? */
> - if (fixup_exception(regs, X86_TRAP_PF)) {
> + if (fixup_exception(regs, X86_TRAP_PF, address)) {
> /*
> * Any interrupt that takes a fault gets the fixup. This makes
> * the below recursive fault logic only apply to a faults from
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 43731fe51c97..b50598761ed4 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -734,6 +734,8 @@ struct task_struct {
> /* disallow userland-initiated cgroup migration */
> unsigned no_cgroup_migration:1;
> #endif
> + /* usercopy functions may fault on kernel addresses */
> + unsigned int kernel_uaccess_faults_ok:1;
>
> unsigned long atomic_flags; /* Flags requiring atomic access. */
>
> diff --git a/mm/maccess.c b/mm/maccess.c
> index ec00be51a24f..e066aa8482af 100644
> --- a/mm/maccess.c
> +++ b/mm/maccess.c
> @@ -30,8 +30,10 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
>
> set_fs(KERNEL_DS);
> pagefault_disable();
> + current->kernel_uaccess_faults_ok = 1;
> ret = __copy_from_user_inatomic(dst,
> (__force const void __user *)src, size);
> + current->kernel_uaccess_faults_ok = 0;
> pagefault_enable();
> set_fs(old_fs);
>
> @@ -58,7 +60,9 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
>
> set_fs(KERNEL_DS);
> pagefault_disable();
> + current->kernel_uaccess_faults_ok = 1;
> ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
> + current->kernel_uaccess_faults_ok = 0;
> pagefault_enable();
> set_fs(old_fs);
>
> @@ -94,11 +98,13 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
>
> set_fs(KERNEL_DS);
> pagefault_disable();
> + current->kernel_uaccess_faults_ok = 1;
>
> do {
> ret = __get_user(*dst++, (const char __user __force *)src++);
> } while (dst[-1] && ret == 0 && src - unsafe_addr < count);
>
> + current->kernel_uaccess_faults_ok = 0;
> dst[-1] = '\0';
> pagefault_enable();
> set_fs(old_fs);
> --
> 2.18.0.597.ga71716f1ad-goog
>
* Andy Lutomirski <[email protected]> wrote:
> > On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
> >
> > There have been multiple kernel vulnerabilities that permitted userspace to
> > pass completely unchecked pointers through to userspace accessors:
> >
> > - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
> > access_ok() checks")
> > - the sg/bsg read/write APIs
> > - the infiniband read/write APIs
> >
> > These don't happen all that often, but when they do happen, it is hard to
> > test for them properly; and it is probably also hard to discover them with
> > fuzzing. Even when an unmapped kernel address is supplied to such buggy
> > code, it just returns -EFAULT instead of doing a proper BUG() or at least
> > WARN().
> >
> > This patch attempts to make such misbehaving code a bit more visible by
> > WARN()ing in the pagefault handler code when a userspace accessor causes
> > #PF on a kernel address and the current context isn't whitelisted.
>
> I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before
> the fancy extable code, though, so it was a mess. Here are some thoughts:
Agreed - please move this series beyond the RFC phase.
> - It should be three patches. One patch to add the _UA annotations, one to improve the info
> passes to the handlers, and one to change behavior.
>
> - You should pass the vector, the error code, and the address to the handler.
>
> - The uaccess handler should IMO WARN if the vector is anything other than #PF (which mainly
> means warning if it’s #GP). I think it should pr_emerg() and return false if the vector is
> #PF and the address is too high.
>
> - Arguably most non-uaccess fixups should at least warn for anything other than #GP and #UD.
Ack.
Thanks,
Ingo
On Tue, Aug 7, 2018 at 3:22 AM, Jann Horn <[email protected]> wrote:
> There have been multiple kernel vulnerabilities that permitted userspace to
> pass completely unchecked pointers through to userspace accessors:
>
> - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
> access_ok() checks")
> - the sg/bsg read/write APIs
> - the infiniband read/write APIs
>
> These don't happen all that often, but when they do happen, it is hard to
> test for them properly; and it is probably also hard to discover them with
> fuzzing. Even when an unmapped kernel address is supplied to such buggy
> code, it just returns -EFAULT instead of doing a proper BUG() or at least
> WARN().
>
> This patch attempts to make such misbehaving code a bit more visible by
> WARN()ing in the pagefault handler code when a userspace accessor causes
> #PF on a kernel address and the current context isn't whitelisted.
This is not triggerable unless there is a kernel bug, right? I mean
this won't be a DoS vector? And any case is something to report to
kernel developers?
> Signed-off-by: Jann Horn <[email protected]>
> ---
> This is a hacky, quickly thrown-together PoC to see what people think
> about the basic idea. Does it look like a sensible change? Or would it
> be better, for example, to instead expand the KASan hooks in the
> usercopy logic to also look at the "user" pointer if it points to
> kernelspace? That would have the advantage of also catching heap
> overflows properly...
>
> I'm not really happy with all the plumbing in my patch; I wonder whether
> there's a way around introducing _ASM_EXTABLE_UA() for user accessors?
>
> arch/x86/include/asm/asm.h | 10 ++-
> arch/x86/include/asm/extable.h | 3 +-
> arch/x86/include/asm/fpu/internal.h | 6 +-
> arch/x86/include/asm/futex.h | 6 +-
> arch/x86/include/asm/uaccess.h | 22 ++---
> arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
> arch/x86/kernel/kprobes/core.c | 2 +-
> arch/x86/kernel/traps.c | 6 +-
> arch/x86/lib/checksum_32.S | 4 +-
> arch/x86/lib/copy_user_64.S | 90 ++++++++++----------
> arch/x86/lib/csum-copy_64.S | 6 +-
> arch/x86/lib/getuser.S | 12 +--
> arch/x86/lib/putuser.S | 10 +--
> arch/x86/lib/usercopy_32.c | 126 ++++++++++++++--------------
> arch/x86/lib/usercopy_64.c | 4 +-
> arch/x86/mm/extable.c | 67 ++++++++++++---
> arch/x86/mm/fault.c | 2 +-
> include/linux/sched.h | 2 +
> mm/maccess.c | 6 ++
> 19 files changed, 221 insertions(+), 165 deletions(-)
>
> diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
> index 990770f9e76b..38f44a773adf 100644
> --- a/arch/x86/include/asm/asm.h
> +++ b/arch/x86/include/asm/asm.h
> @@ -130,6 +130,9 @@
> # define _ASM_EXTABLE(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
>
> +# define _ASM_EXTABLE_UA(from, to) \
> + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
> +
> # define _ASM_EXTABLE_FAULT(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
>
> @@ -165,8 +168,8 @@
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(100b,103b)
> - _ASM_EXTABLE(101b,103b)
> + _ASM_EXTABLE_UA(100b,103b)
> + _ASM_EXTABLE_UA(101b,103b)
> .endm
>
> #else
> @@ -182,6 +185,9 @@
> # define _ASM_EXTABLE(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
>
> +# define _ASM_EXTABLE_UA(from, to) \
> + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
> +
> # define _ASM_EXTABLE_FAULT(from, to) \
> _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
>
> diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
> index f9c3a5d502f4..93c1d28f3c73 100644
> --- a/arch/x86/include/asm/extable.h
> +++ b/arch/x86/include/asm/extable.h
> @@ -29,7 +29,8 @@ struct pt_regs;
> (b)->handler = (tmp).handler - (delta); \
> } while (0)
>
> -extern int fixup_exception(struct pt_regs *regs, int trapnr);
> +extern int fixup_exception(struct pt_regs *regs, int trapnr,
> + unsigned long pf_addr);
> extern int fixup_bug(struct pt_regs *regs, int trapnr);
> extern bool ex_has_fault_handler(unsigned long ip);
> extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
> diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
> index a38bf5a1e37a..640e7721138c 100644
> --- a/arch/x86/include/asm/fpu/internal.h
> +++ b/arch/x86/include/asm/fpu/internal.h
> @@ -113,7 +113,7 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
> "3: movl $-1,%[err]\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : [err] "=r" (err), output \
> : "0"(0), input); \
> err; \
> @@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
> "3: movl $-2,%[err]\n\t" \
> "jmp 2b\n\t" \
> ".popsection\n\t" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : [err] "=r" (err) \
> : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
> : "memory")
> @@ -256,7 +256,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
> "4: movl $-2, %[err]\n" \
> "jmp 3b\n" \
> ".popsection\n" \
> - _ASM_EXTABLE(661b, 4b) \
> + _ASM_EXTABLE_UA(661b, 4b) \
> : [err] "=r" (err) \
> : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
> : "memory")
> diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
> index de4d68852d3a..13c83fe97988 100644
> --- a/arch/x86/include/asm/futex.h
> +++ b/arch/x86/include/asm/futex.h
> @@ -20,7 +20,7 @@
> "3:\tmov\t%3, %1\n" \
> "\tjmp\t2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
> : "i" (-EFAULT), "0" (oparg), "1" (0))
>
> @@ -36,8 +36,8 @@
> "4:\tmov\t%5, %1\n" \
> "\tjmp\t3b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 4b) \
> - _ASM_EXTABLE(2b, 4b) \
> + _ASM_EXTABLE_UA(1b, 4b) \
> + _ASM_EXTABLE_UA(2b, 4b) \
> : "=&a" (oldval), "=&r" (ret), \
> "+m" (*uaddr), "=&r" (tem) \
> : "r" (oparg), "i" (-EFAULT), "1" (0))
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index aae77eb8491c..b5e58cc0c5e7 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -198,8 +198,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
> "4: movl %3,%0\n" \
> " jmp 3b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 4b) \
> - _ASM_EXTABLE(2b, 4b) \
> + _ASM_EXTABLE_UA(1b, 4b) \
> + _ASM_EXTABLE_UA(2b, 4b) \
> : "=r" (err) \
> : "A" (x), "r" (addr), "i" (errret), "0" (err))
>
> @@ -340,8 +340,8 @@ do { \
> " xorl %%edx,%%edx\n" \
> " jmp 3b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 4b) \
> - _ASM_EXTABLE(2b, 4b) \
> + _ASM_EXTABLE_UA(1b, 4b) \
> + _ASM_EXTABLE_UA(2b, 4b) \
> : "=r" (retval), "=&A"(x) \
> : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
> "i" (errret), "0" (retval)); \
> @@ -386,7 +386,7 @@ do { \
> " xor"itype" %"rtype"1,%"rtype"1\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r" (err), ltype(x) \
> : "m" (__m(addr)), "i" (errret), "0" (err))
>
> @@ -398,7 +398,7 @@ do { \
> "3: mov %3,%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r" (err), ltype(x) \
> : "m" (__m(addr)), "i" (errret), "0" (err))
>
> @@ -474,7 +474,7 @@ struct __large_struct { unsigned long buf[100]; };
> "3: mov %3,%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "=r"(err) \
> : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
>
> @@ -602,7 +602,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "q" (__new), "1" (__old) \
> : "memory" \
> @@ -618,7 +618,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "r" (__new), "1" (__old) \
> : "memory" \
> @@ -634,7 +634,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "r" (__new), "1" (__old) \
> : "memory" \
> @@ -653,7 +653,7 @@ extern void __cmpxchg_wrong_size(void)
> "3:\tmov %3, %0\n" \
> "\tjmp 2b\n" \
> "\t.previous\n" \
> - _ASM_EXTABLE(1b, 3b) \
> + _ASM_EXTABLE_UA(1b, 3b) \
> : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
> : "i" (-EFAULT), "r" (__new), "1" (__old) \
> : "memory" \
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 8c50754c09c1..e038e3f31e08 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -1335,7 +1335,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
> local_irq_disable();
> ist_end_non_atomic();
> } else {
> - if (!fixup_exception(regs, X86_TRAP_MC))
> + if (!fixup_exception(regs, X86_TRAP_MC, 0))
> mce_panic("Failed kernel mode recovery", &m, NULL);
> }
>
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index 6f4d42377fe5..5ad2639d3b8a 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -1044,7 +1044,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
> * In case the user-specified fault handler returned
> * zero, try to fix up.
> */
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return 1;
>
> /*
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index e6db475164ed..0cd11b46072a 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -206,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
> }
>
> if (!user_mode(regs)) {
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return 0;
>
> tsk->thread.error_code = error_code;
> @@ -551,7 +551,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
>
> tsk = current;
> if (!user_mode(regs)) {
> - if (fixup_exception(regs, X86_TRAP_GP))
> + if (fixup_exception(regs, X86_TRAP_GP, 0))
> return;
>
> tsk->thread.error_code = error_code;
> @@ -838,7 +838,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
> cond_local_irq_enable(regs);
>
> if (!user_mode(regs)) {
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return;
>
> task->thread.error_code = error_code;
> diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
> index 46e71a74e612..ad8e0906d1ea 100644
> --- a/arch/x86/lib/checksum_32.S
> +++ b/arch/x86/lib/checksum_32.S
> @@ -273,11 +273,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
>
> #define SRC(y...) \
> 9999: y; \
> - _ASM_EXTABLE(9999b, 6001f)
> + _ASM_EXTABLE_UA(9999b, 6001f)
>
> #define DST(y...) \
> 9999: y; \
> - _ASM_EXTABLE(9999b, 6002f)
> + _ASM_EXTABLE_UA(9999b, 6002f)
>
> #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
>
> diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
> index 020f75cc8cf6..80cfad666210 100644
> --- a/arch/x86/lib/copy_user_64.S
> +++ b/arch/x86/lib/copy_user_64.S
> @@ -92,26 +92,26 @@ ENTRY(copy_user_generic_unrolled)
> 60: jmp copy_user_handle_tail /* ecx is zerorest also */
> .previous
>
> - _ASM_EXTABLE(1b,30b)
> - _ASM_EXTABLE(2b,30b)
> - _ASM_EXTABLE(3b,30b)
> - _ASM_EXTABLE(4b,30b)
> - _ASM_EXTABLE(5b,30b)
> - _ASM_EXTABLE(6b,30b)
> - _ASM_EXTABLE(7b,30b)
> - _ASM_EXTABLE(8b,30b)
> - _ASM_EXTABLE(9b,30b)
> - _ASM_EXTABLE(10b,30b)
> - _ASM_EXTABLE(11b,30b)
> - _ASM_EXTABLE(12b,30b)
> - _ASM_EXTABLE(13b,30b)
> - _ASM_EXTABLE(14b,30b)
> - _ASM_EXTABLE(15b,30b)
> - _ASM_EXTABLE(16b,30b)
> - _ASM_EXTABLE(18b,40b)
> - _ASM_EXTABLE(19b,40b)
> - _ASM_EXTABLE(21b,50b)
> - _ASM_EXTABLE(22b,50b)
> + _ASM_EXTABLE_UA(1b,30b)
> + _ASM_EXTABLE_UA(2b,30b)
> + _ASM_EXTABLE_UA(3b,30b)
> + _ASM_EXTABLE_UA(4b,30b)
> + _ASM_EXTABLE_UA(5b,30b)
> + _ASM_EXTABLE_UA(6b,30b)
> + _ASM_EXTABLE_UA(7b,30b)
> + _ASM_EXTABLE_UA(8b,30b)
> + _ASM_EXTABLE_UA(9b,30b)
> + _ASM_EXTABLE_UA(10b,30b)
> + _ASM_EXTABLE_UA(11b,30b)
> + _ASM_EXTABLE_UA(12b,30b)
> + _ASM_EXTABLE_UA(13b,30b)
> + _ASM_EXTABLE_UA(14b,30b)
> + _ASM_EXTABLE_UA(15b,30b)
> + _ASM_EXTABLE_UA(16b,30b)
> + _ASM_EXTABLE_UA(18b,40b)
> + _ASM_EXTABLE_UA(19b,40b)
> + _ASM_EXTABLE_UA(21b,50b)
> + _ASM_EXTABLE_UA(22b,50b)
> ENDPROC(copy_user_generic_unrolled)
> EXPORT_SYMBOL(copy_user_generic_unrolled)
>
> @@ -156,8 +156,8 @@ ENTRY(copy_user_generic_string)
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(1b,11b)
> - _ASM_EXTABLE(3b,12b)
> + _ASM_EXTABLE_UA(1b,11b)
> + _ASM_EXTABLE_UA(3b,12b)
> ENDPROC(copy_user_generic_string)
> EXPORT_SYMBOL(copy_user_generic_string)
>
> @@ -189,7 +189,7 @@ ENTRY(copy_user_enhanced_fast_string)
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(1b,12b)
> + _ASM_EXTABLE_UA(1b,12b)
> ENDPROC(copy_user_enhanced_fast_string)
> EXPORT_SYMBOL(copy_user_enhanced_fast_string)
>
> @@ -319,27 +319,27 @@ ENTRY(__copy_user_nocache)
> jmp copy_user_handle_tail
> .previous
>
> - _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
> - _ASM_EXTABLE(20b,.L_fixup_8b_copy)
> - _ASM_EXTABLE(21b,.L_fixup_8b_copy)
> - _ASM_EXTABLE(30b,.L_fixup_4b_copy)
> - _ASM_EXTABLE(31b,.L_fixup_4b_copy)
> - _ASM_EXTABLE(40b,.L_fixup_1b_copy)
> - _ASM_EXTABLE(41b,.L_fixup_1b_copy)
> + _ASM_EXTABLE_UA(1b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(2b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(3b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(4b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(5b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(6b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(7b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(8b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(9b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(10b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(11b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(12b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(13b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(14b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(15b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(16b,.L_fixup_4x8b_copy)
> + _ASM_EXTABLE_UA(20b,.L_fixup_8b_copy)
> + _ASM_EXTABLE_UA(21b,.L_fixup_8b_copy)
> + _ASM_EXTABLE_UA(30b,.L_fixup_4b_copy)
> + _ASM_EXTABLE_UA(31b,.L_fixup_4b_copy)
> + _ASM_EXTABLE_UA(40b,.L_fixup_1b_copy)
> + _ASM_EXTABLE_UA(41b,.L_fixup_1b_copy)
> ENDPROC(__copy_user_nocache)
> EXPORT_SYMBOL(__copy_user_nocache)
> diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
> index 45a53dfe1859..969af904c74b 100644
> --- a/arch/x86/lib/csum-copy_64.S
> +++ b/arch/x86/lib/csum-copy_64.S
> @@ -31,17 +31,17 @@
>
> .macro source
> 10:
> - _ASM_EXTABLE(10b, .Lbad_source)
> + _ASM_EXTABLE_UA(10b, .Lbad_source)
> .endm
>
> .macro dest
> 20:
> - _ASM_EXTABLE(20b, .Lbad_dest)
> + _ASM_EXTABLE_UA(20b, .Lbad_dest)
> .endm
>
> .macro ignore L=.Lignore
> 30:
> - _ASM_EXTABLE(30b, \L)
> + _ASM_EXTABLE_UA(30b, \L)
> .endm
>
>
> diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
> index 49b167f73215..884fe795d9d6 100644
> --- a/arch/x86/lib/getuser.S
> +++ b/arch/x86/lib/getuser.S
> @@ -132,12 +132,12 @@ bad_get_user_8:
> END(bad_get_user_8)
> #endif
>
> - _ASM_EXTABLE(1b,bad_get_user)
> - _ASM_EXTABLE(2b,bad_get_user)
> - _ASM_EXTABLE(3b,bad_get_user)
> + _ASM_EXTABLE_UA(1b,bad_get_user)
> + _ASM_EXTABLE_UA(2b,bad_get_user)
> + _ASM_EXTABLE_UA(3b,bad_get_user)
> #ifdef CONFIG_X86_64
> - _ASM_EXTABLE(4b,bad_get_user)
> + _ASM_EXTABLE_UA(4b,bad_get_user)
> #else
> - _ASM_EXTABLE(4b,bad_get_user_8)
> - _ASM_EXTABLE(5b,bad_get_user_8)
> + _ASM_EXTABLE_UA(4b,bad_get_user_8)
> + _ASM_EXTABLE_UA(5b,bad_get_user_8)
> #endif
> diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
> index 96dce5fe2a35..cdcf6143d953 100644
> --- a/arch/x86/lib/putuser.S
> +++ b/arch/x86/lib/putuser.S
> @@ -94,10 +94,10 @@ bad_put_user:
> EXIT
> END(bad_put_user)
>
> - _ASM_EXTABLE(1b,bad_put_user)
> - _ASM_EXTABLE(2b,bad_put_user)
> - _ASM_EXTABLE(3b,bad_put_user)
> - _ASM_EXTABLE(4b,bad_put_user)
> + _ASM_EXTABLE_UA(1b,bad_put_user)
> + _ASM_EXTABLE_UA(2b,bad_put_user)
> + _ASM_EXTABLE_UA(3b,bad_put_user)
> + _ASM_EXTABLE_UA(4b,bad_put_user)
> #ifdef CONFIG_X86_32
> - _ASM_EXTABLE(5b,bad_put_user)
> + _ASM_EXTABLE_UA(5b,bad_put_user)
> #endif
> diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
> index 7add8ba06887..92eb5956f2f3 100644
> --- a/arch/x86/lib/usercopy_32.c
> +++ b/arch/x86/lib/usercopy_32.c
> @@ -47,8 +47,8 @@ do { \
> "3: lea 0(%2,%0,4),%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(0b,3b) \
> - _ASM_EXTABLE(1b,2b) \
> + _ASM_EXTABLE_UA(0b,3b) \
> + _ASM_EXTABLE_UA(1b,2b) \
> : "=&c"(size), "=&D" (__d0) \
> : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
> } while (0)
> @@ -153,44 +153,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
> "101: lea 0(%%eax,%0,4),%0\n"
> " jmp 100b\n"
> ".previous\n"
> - _ASM_EXTABLE(1b,100b)
> - _ASM_EXTABLE(2b,100b)
> - _ASM_EXTABLE(3b,100b)
> - _ASM_EXTABLE(4b,100b)
> - _ASM_EXTABLE(5b,100b)
> - _ASM_EXTABLE(6b,100b)
> - _ASM_EXTABLE(7b,100b)
> - _ASM_EXTABLE(8b,100b)
> - _ASM_EXTABLE(9b,100b)
> - _ASM_EXTABLE(10b,100b)
> - _ASM_EXTABLE(11b,100b)
> - _ASM_EXTABLE(12b,100b)
> - _ASM_EXTABLE(13b,100b)
> - _ASM_EXTABLE(14b,100b)
> - _ASM_EXTABLE(15b,100b)
> - _ASM_EXTABLE(16b,100b)
> - _ASM_EXTABLE(17b,100b)
> - _ASM_EXTABLE(18b,100b)
> - _ASM_EXTABLE(19b,100b)
> - _ASM_EXTABLE(20b,100b)
> - _ASM_EXTABLE(21b,100b)
> - _ASM_EXTABLE(22b,100b)
> - _ASM_EXTABLE(23b,100b)
> - _ASM_EXTABLE(24b,100b)
> - _ASM_EXTABLE(25b,100b)
> - _ASM_EXTABLE(26b,100b)
> - _ASM_EXTABLE(27b,100b)
> - _ASM_EXTABLE(28b,100b)
> - _ASM_EXTABLE(29b,100b)
> - _ASM_EXTABLE(30b,100b)
> - _ASM_EXTABLE(31b,100b)
> - _ASM_EXTABLE(32b,100b)
> - _ASM_EXTABLE(33b,100b)
> - _ASM_EXTABLE(34b,100b)
> - _ASM_EXTABLE(35b,100b)
> - _ASM_EXTABLE(36b,100b)
> - _ASM_EXTABLE(37b,100b)
> - _ASM_EXTABLE(99b,101b)
> + _ASM_EXTABLE_UA(1b,100b)
> + _ASM_EXTABLE_UA(2b,100b)
> + _ASM_EXTABLE_UA(3b,100b)
> + _ASM_EXTABLE_UA(4b,100b)
> + _ASM_EXTABLE_UA(5b,100b)
> + _ASM_EXTABLE_UA(6b,100b)
> + _ASM_EXTABLE_UA(7b,100b)
> + _ASM_EXTABLE_UA(8b,100b)
> + _ASM_EXTABLE_UA(9b,100b)
> + _ASM_EXTABLE_UA(10b,100b)
> + _ASM_EXTABLE_UA(11b,100b)
> + _ASM_EXTABLE_UA(12b,100b)
> + _ASM_EXTABLE_UA(13b,100b)
> + _ASM_EXTABLE_UA(14b,100b)
> + _ASM_EXTABLE_UA(15b,100b)
> + _ASM_EXTABLE_UA(16b,100b)
> + _ASM_EXTABLE_UA(17b,100b)
> + _ASM_EXTABLE_UA(18b,100b)
> + _ASM_EXTABLE_UA(19b,100b)
> + _ASM_EXTABLE_UA(20b,100b)
> + _ASM_EXTABLE_UA(21b,100b)
> + _ASM_EXTABLE_UA(22b,100b)
> + _ASM_EXTABLE_UA(23b,100b)
> + _ASM_EXTABLE_UA(24b,100b)
> + _ASM_EXTABLE_UA(25b,100b)
> + _ASM_EXTABLE_UA(26b,100b)
> + _ASM_EXTABLE_UA(27b,100b)
> + _ASM_EXTABLE_UA(28b,100b)
> + _ASM_EXTABLE_UA(29b,100b)
> + _ASM_EXTABLE_UA(30b,100b)
> + _ASM_EXTABLE_UA(31b,100b)
> + _ASM_EXTABLE_UA(32b,100b)
> + _ASM_EXTABLE_UA(33b,100b)
> + _ASM_EXTABLE_UA(34b,100b)
> + _ASM_EXTABLE_UA(35b,100b)
> + _ASM_EXTABLE_UA(36b,100b)
> + _ASM_EXTABLE_UA(37b,100b)
> + _ASM_EXTABLE_UA(99b,101b)
> : "=&c"(size), "=&D" (d0), "=&S" (d1)
> : "1"(to), "2"(from), "0"(size)
> : "eax", "edx", "memory");
> @@ -259,26 +259,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
> "9: lea 0(%%eax,%0,4),%0\n"
> "16: jmp 8b\n"
> ".previous\n"
> - _ASM_EXTABLE(0b,16b)
> - _ASM_EXTABLE(1b,16b)
> - _ASM_EXTABLE(2b,16b)
> - _ASM_EXTABLE(21b,16b)
> - _ASM_EXTABLE(3b,16b)
> - _ASM_EXTABLE(31b,16b)
> - _ASM_EXTABLE(4b,16b)
> - _ASM_EXTABLE(41b,16b)
> - _ASM_EXTABLE(10b,16b)
> - _ASM_EXTABLE(51b,16b)
> - _ASM_EXTABLE(11b,16b)
> - _ASM_EXTABLE(61b,16b)
> - _ASM_EXTABLE(12b,16b)
> - _ASM_EXTABLE(71b,16b)
> - _ASM_EXTABLE(13b,16b)
> - _ASM_EXTABLE(81b,16b)
> - _ASM_EXTABLE(14b,16b)
> - _ASM_EXTABLE(91b,16b)
> - _ASM_EXTABLE(6b,9b)
> - _ASM_EXTABLE(7b,16b)
> + _ASM_EXTABLE_UA(0b,16b)
> + _ASM_EXTABLE_UA(1b,16b)
> + _ASM_EXTABLE_UA(2b,16b)
> + _ASM_EXTABLE_UA(21b,16b)
> + _ASM_EXTABLE_UA(3b,16b)
> + _ASM_EXTABLE_UA(31b,16b)
> + _ASM_EXTABLE_UA(4b,16b)
> + _ASM_EXTABLE_UA(41b,16b)
> + _ASM_EXTABLE_UA(10b,16b)
> + _ASM_EXTABLE_UA(51b,16b)
> + _ASM_EXTABLE_UA(11b,16b)
> + _ASM_EXTABLE_UA(61b,16b)
> + _ASM_EXTABLE_UA(12b,16b)
> + _ASM_EXTABLE_UA(71b,16b)
> + _ASM_EXTABLE_UA(13b,16b)
> + _ASM_EXTABLE_UA(81b,16b)
> + _ASM_EXTABLE_UA(14b,16b)
> + _ASM_EXTABLE_UA(91b,16b)
> + _ASM_EXTABLE_UA(6b,9b)
> + _ASM_EXTABLE_UA(7b,16b)
> : "=&c"(size), "=&D" (d0), "=&S" (d1)
> : "1"(to), "2"(from), "0"(size)
> : "eax", "edx", "memory");
> @@ -321,9 +321,9 @@ do { \
> "3: lea 0(%3,%0,4),%0\n" \
> " jmp 2b\n" \
> ".previous\n" \
> - _ASM_EXTABLE(4b,5b) \
> - _ASM_EXTABLE(0b,3b) \
> - _ASM_EXTABLE(1b,2b) \
> + _ASM_EXTABLE_UA(4b,5b) \
> + _ASM_EXTABLE_UA(0b,3b) \
> + _ASM_EXTABLE_UA(1b,2b) \
> : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
> : "3"(size), "0"(size), "1"(to), "2"(from) \
> : "memory"); \
> diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
> index 9c5606d88f61..c55b1f590cc4 100644
> --- a/arch/x86/lib/usercopy_64.c
> +++ b/arch/x86/lib/usercopy_64.c
> @@ -37,8 +37,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
> "3: lea 0(%[size1],%[size8],8),%[size8]\n"
> " jmp 2b\n"
> ".previous\n"
> - _ASM_EXTABLE(0b,3b)
> - _ASM_EXTABLE(1b,2b)
> + _ASM_EXTABLE_UA(0b,3b)
> + _ASM_EXTABLE_UA(1b,2b)
> : [size8] "=&c"(size), [dst] "=&D" (__d0)
> : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
> clac();
> diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
> index 45f5d6cf65ae..3fd55a03892d 100644
> --- a/arch/x86/mm/extable.c
> +++ b/arch/x86/mm/extable.c
> @@ -8,7 +8,7 @@
> #include <asm/kdebug.h>
>
> typedef bool (*ex_handler_t)(const struct exception_table_entry *,
> - struct pt_regs *, int);
> + struct pt_regs *, int, unsigned long);
>
> static inline unsigned long
> ex_fixup_addr(const struct exception_table_entry *x)
> @@ -22,7 +22,8 @@ ex_fixup_handler(const struct exception_table_entry *x)
> }
>
> __visible bool ex_handler_default(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> regs->ip = ex_fixup_addr(fixup);
> return true;
> @@ -30,7 +31,8 @@ __visible bool ex_handler_default(const struct exception_table_entry *fixup,
> EXPORT_SYMBOL(ex_handler_default);
>
> __visible bool ex_handler_fault(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> regs->ip = ex_fixup_addr(fixup);
> regs->ax = trapnr;
> @@ -43,7 +45,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
> * result of a refcount inc/dec/add/sub.
> */
> __visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> /* First unconditionally saturate the refcount. */
> *(int *)regs->cx = INT_MIN / 2;
> @@ -96,7 +99,8 @@ EXPORT_SYMBOL(ex_handler_refcount);
> * out all the FPU registers) if we can't restore from the task's FPU state.
> */
> __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> regs->ip = ex_fixup_addr(fixup);
>
> @@ -108,18 +112,53 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
> }
> EXPORT_SYMBOL_GPL(ex_handler_fprestore);
>
> +static void bogus_uaccess_check(struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> +{
> + if (trapnr != X86_TRAP_PF || fault_addr < TASK_SIZE_MAX)
> + return;
> + /*
> + * This is a pagefault in kernel space, on a kernel address, in a
> + * usercopy function. This can e.g. be caused by improper use of helpers
> + * like __put_user and by improper attempts to access userspace
> + * addresses in KERNEL_DS regions. The one legitimate exception are
> + * probe_kernel_{read,write}(), which can be invoked from places like
> + * kgdb, /dev/mem (for reading) and privileged BPF code (for reading).
> + * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
> + * to tell us that faulting on kernel addresses in a userspace accessor
> + * is fine.
> + */
> + if (current->kernel_uaccess_faults_ok)
> + return;
> + WARN(1, "pagefault on kernel address 0x%lx in non-whitelisted uaccess",
> + fault_addr);
> +}
> +
> +__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> +{
> + bogus_uaccess_check(regs, trapnr, fault_addr);
> + regs->ip = ex_fixup_addr(fixup);
> + return true;
> +}
> +EXPORT_SYMBOL(ex_handler_uaccess);
> +
> __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> /* Special hack for uaccess_err */
> current->thread.uaccess_err = 1;
> + bogus_uaccess_check(regs, trapnr, fault_addr);
> regs->ip = ex_fixup_addr(fixup);
> return true;
> }
> EXPORT_SYMBOL(ex_handler_ext);
>
> __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
> (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
> @@ -134,7 +173,8 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup
> EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
>
> __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
> (unsigned int)regs->cx, (unsigned int)regs->dx,
> @@ -148,12 +188,13 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
> EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
>
> __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
> - struct pt_regs *regs, int trapnr)
> + struct pt_regs *regs, int trapnr,
> + unsigned long fault_addr)
> {
> if (static_cpu_has(X86_BUG_NULL_SEG))
> asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
> asm volatile ("mov %0, %%fs" : : "rm" (0));
> - return ex_handler_default(fixup, regs, trapnr);
> + return ex_handler_default(fixup, regs, trapnr, fault_addr);
> }
> EXPORT_SYMBOL(ex_handler_clear_fs);
>
> @@ -170,7 +211,7 @@ __visible bool ex_has_fault_handler(unsigned long ip)
> return handler == ex_handler_fault;
> }
>
> -int fixup_exception(struct pt_regs *regs, int trapnr)
> +int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long fault_addr)
> {
> const struct exception_table_entry *e;
> ex_handler_t handler;
> @@ -194,7 +235,7 @@ int fixup_exception(struct pt_regs *regs, int trapnr)
> return 0;
>
> handler = ex_fixup_handler(e);
> - return handler(e, regs, trapnr);
> + return handler(e, regs, trapnr, fault_addr);
> }
>
> extern unsigned int early_recursion_flag;
> @@ -232,7 +273,7 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
> * Keep in mind that not all vectors actually get here. Early
> * fage faults, for example, are special.
> */
> - if (fixup_exception(regs, trapnr))
> + if (fixup_exception(regs, trapnr, 0))
> return;
>
> if (fixup_bug(regs, trapnr))
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 2aafa6ab6103..96e3e5cf2cfc 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -710,7 +710,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
> int sig;
>
> /* Are we prepared to handle this kernel fault? */
> - if (fixup_exception(regs, X86_TRAP_PF)) {
> + if (fixup_exception(regs, X86_TRAP_PF, address)) {
> /*
> * Any interrupt that takes a fault gets the fixup. This makes
> * the below recursive fault logic only apply to a faults from
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 43731fe51c97..b50598761ed4 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -734,6 +734,8 @@ struct task_struct {
> /* disallow userland-initiated cgroup migration */
> unsigned no_cgroup_migration:1;
> #endif
> + /* usercopy functions may fault on kernel addresses */
> + unsigned int kernel_uaccess_faults_ok:1;
>
> unsigned long atomic_flags; /* Flags requiring atomic access. */
>
> diff --git a/mm/maccess.c b/mm/maccess.c
> index ec00be51a24f..e066aa8482af 100644
> --- a/mm/maccess.c
> +++ b/mm/maccess.c
> @@ -30,8 +30,10 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
>
> set_fs(KERNEL_DS);
> pagefault_disable();
> + current->kernel_uaccess_faults_ok = 1;
> ret = __copy_from_user_inatomic(dst,
> (__force const void __user *)src, size);
> + current->kernel_uaccess_faults_ok = 0;
> pagefault_enable();
> set_fs(old_fs);
>
> @@ -58,7 +60,9 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
>
> set_fs(KERNEL_DS);
> pagefault_disable();
> + current->kernel_uaccess_faults_ok = 1;
> ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
> + current->kernel_uaccess_faults_ok = 0;
> pagefault_enable();
> set_fs(old_fs);
>
> @@ -94,11 +98,13 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
>
> set_fs(KERNEL_DS);
> pagefault_disable();
> + current->kernel_uaccess_faults_ok = 1;
>
> do {
> ret = __get_user(*dst++, (const char __user __force *)src++);
> } while (dst[-1] && ret == 0 && src - unsafe_addr < count);
>
> + current->kernel_uaccess_faults_ok = 0;
> dst[-1] = '\0';
> pagefault_enable();
> set_fs(old_fs);
> --
> 2.18.0.597.ga71716f1ad-goog
>
> On Aug 7, 2018, at 4:04 AM, Dmitry Vyukov <[email protected]> wrote:
>
>> On Tue, Aug 7, 2018 at 3:22 AM, Jann Horn <[email protected]> wrote:
>> There have been multiple kernel vulnerabilities that permitted userspace to
>> pass completely unchecked pointers through to userspace accessors:
>>
>> - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
>> access_ok() checks")
>> - the sg/bsg read/write APIs
>> - the infiniband read/write APIs
>>
>> These don't happen all that often, but when they do happen, it is hard to
>> test for them properly; and it is probably also hard to discover them with
>> fuzzing. Even when an unmapped kernel address is supplied to such buggy
>> code, it just returns -EFAULT instead of doing a proper BUG() or at least
>> WARN().
>>
>> This patch attempts to make such misbehaving code a bit more visible by
>> WARN()ing in the pagefault handler code when a userspace accessor causes
>> #PF on a kernel address and the current context isn't whitelisted.
>
> This is not triggerable unless there is a kernel bug, right? I mean
> this won't be a DoS vector? And any case is something to report to
> kernel developers?
Yes. I expect it to help fuzzers, since it will make a uaccess at a bad address much more likely to oops.
My old series found one bug when the automated fuzzers fuzzed it. That bug is fixed now.
On Tue, Aug 7, 2018 at 4:55 AM Andy Lutomirski <[email protected]> wrote:
> > On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
> >
> > There have been multiple kernel vulnerabilities that permitted userspace to
> > pass completely unchecked pointers through to userspace accessors:
> >
> > - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
> > access_ok() checks")
> > - the sg/bsg read/write APIs
> > - the infiniband read/write APIs
> >
> > These don't happen all that often, but when they do happen, it is hard to
> > test for them properly; and it is probably also hard to discover them with
> > fuzzing. Even when an unmapped kernel address is supplied to such buggy
> > code, it just returns -EFAULT instead of doing a proper BUG() or at least
> > WARN().
> >
> > This patch attempts to make such misbehaving code a bit more visible by
> > WARN()ing in the pagefault handler code when a userspace accessor causes
> > #PF on a kernel address and the current context isn't whitelisted.
>
> I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before the fancy extable code, though, so it was a mess. Here are some thoughts:
>
> - It should be three patches. One patch to add the _UA annotations, one to improve the info passes to the handlers, and one to change behavior.
>
> - You should pass the vector, the error code, and the address to the handler.
>
> - The uaccess handler should IMO WARN if the vector is anything other than #PF (which mainly means warning if it’s #GP). I think it should pr_emerg() and return false if the vector is #PF and the address is too high.
What about #MC? do_machine_check() sometimes invokes fixup handlers.
It looks like fixup_exception() is basically reached for anything that
can't be restarted (either MCG_STATUS_RIPV isn't set or the worst
severity is MCE_AR_SEVERITY), but doesn't reach MCE_PANIC_SEVERITY? In
particular for "Action required: data load in error recoverable area
of kernel", if I'm reading the code correctly. It seems like the code
is intentionally preventing memory errors during user access from
being treated as kernel memory errors? So perhaps #MC in user access
should not WARN()?
> - Arguably most non-uaccess fixups should at least warn for anything other than #GP and #UD.
> On Aug 7, 2018, at 2:17 PM, Jann Horn <[email protected]> wrote:
>
> On Tue, Aug 7, 2018 at 4:55 AM Andy Lutomirski <[email protected]> wrote:
>>> On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
>>>
>>> There have been multiple kernel vulnerabilities that permitted userspace to
>>> pass completely unchecked pointers through to userspace accessors:
>>>
>>> - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
>>> access_ok() checks")
>>> - the sg/bsg read/write APIs
>>> - the infiniband read/write APIs
>>>
>>> These don't happen all that often, but when they do happen, it is hard to
>>> test for them properly; and it is probably also hard to discover them with
>>> fuzzing. Even when an unmapped kernel address is supplied to such buggy
>>> code, it just returns -EFAULT instead of doing a proper BUG() or at least
>>> WARN().
>>>
>>> This patch attempts to make such misbehaving code a bit more visible by
>>> WARN()ing in the pagefault handler code when a userspace accessor causes
>>> #PF on a kernel address and the current context isn't whitelisted.
>>
>> I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before the fancy extable code, though, so it was a mess. Here are some thoughts:
>>
>> - It should be three patches. One patch to add the _UA annotations, one to improve the info passes to the handlers, and one to change behavior.
>>
>> - You should pass the vector, the error code, and the address to the handler.
>>
>> - The uaccess handler should IMO WARN if the vector is anything other than #PF (which mainly means warning if it’s #GP). I think it should pr_emerg() and return false if the vector is #PF and the address is too high.
>
> What about #MC? do_machine_check() sometimes invokes fixup handlers.
> It looks like fixup_exception() is basically reached for anything that
> can't be restarted (either MCG_STATUS_RIPV isn't set or the worst
> severity is MCE_AR_SEVERITY), but doesn't reach MCE_PANIC_SEVERITY? In
> particular for "Action required: data load in error recoverable area
> of kernel", if I'm reading the code correctly. It seems like the code
> is intentionally preventing memory errors during user access from
> being treated as kernel memory errors? So perhaps #MC in user access
> should not WARN()?
Agreed.
>
>> - Arguably most non-uaccess fixups should at least warn for anything other than #GP and #UD.
On Tue, Aug 7, 2018 at 4:55 AM Andy Lutomirski <[email protected]> wrote:
> > On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
> > There have been multiple kernel vulnerabilities that permitted userspace to
> > pass completely unchecked pointers through to userspace accessors:
> >
> > - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
> > access_ok() checks")
> > - the sg/bsg read/write APIs
> > - the infiniband read/write APIs
> >
> > These don't happen all that often, but when they do happen, it is hard to
> > test for them properly; and it is probably also hard to discover them with
> > fuzzing. Even when an unmapped kernel address is supplied to such buggy
> > code, it just returns -EFAULT instead of doing a proper BUG() or at least
> > WARN().
> >
> > This patch attempts to make such misbehaving code a bit more visible by
> > WARN()ing in the pagefault handler code when a userspace accessor causes
> > #PF on a kernel address and the current context isn't whitelisted.
>
> I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before the fancy extable code, though, so it was a mess. Here are some thoughts:
>
> - It should be three patches. One patch to add the _UA annotations, one to improve the info passes to the handlers, and one to change behavior.
>
> - You should pass the vector, the error code, and the address to the handler.
I'm polishing the patch a bit, and I've noticed that to plumb the
error code and address through properly, I might need significantly
more code churn because of kprobes - I want to make sure I'm not going
down the completely wrong path here.
I'm extending fixup_exception() to take two extra args "unsigned long
error_code, unsigned long fault_addr". Most callers of
fixup_exception() are fairly straightforward, but
kprobe_fault_handler() has a dozen callchains from different exception
handlers, and most of them are coming via notify_die(). (My RFC patch
cheated by just feeding zeroes into fixup_exception() from
kprobe_fault_handler().) Also, annoyingly, for !CONFIG_KPROBES,
kprobe_fault_handler() is defined in include/linux/kprobes.h with a
single prototype across architectures.
Currently, for example, when do_general_protection() handles a kernel
fault, it first directly calls into fixup_exception(); and then, if
this is happening inside a kprobe, via
notify_die()->atomic_notifier_call_chain()->kprobe_exceptions_notify()->kprobe_fault_handler()->fixup_exception(),
it can end up calling fixup handlers a second time.
I think there's also some inconsistency between #PF and #GP in the
ordering of error handling:
#PF handling:
__do_page_fault
kprobes_fault
kprobe_fault_handler
->fault_handler # first: kprobe fault handler
fixup_exception # second: kprobe's fixup call
bad_area_nosemaphore
__bad_area_nosemaphore
no_context
fixup_exception # third: normal fixup call
#GP handling:
do_general_protection
fixup_exception # first: normal fixup call
notify_die
atomic_notifier_call_chain
kprobe_exceptions_notify
kprobe_fault_handler
->fault_handler # second: kprobe fault handler
fixup_exception # third: kprobe's fixup call
Do you think I should actually plumb the error code and fault address
all the way through notify_die() and the kprobe handling fault?
Should I supply some "I don't want to trigger uaccess fault handlers"
flag when coming from the kprobe code?
Should the kprobe code not call into fixup_exception() at all (and if
so, should I change that somehow)?
On Wed, Aug 22, 2018 at 4:53 PM, Jann Horn <[email protected]> wrote:
> On Tue, Aug 7, 2018 at 4:55 AM Andy Lutomirski <[email protected]> wrote:
>> > On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
>> > There have been multiple kernel vulnerabilities that permitted userspace to
>> > pass completely unchecked pointers through to userspace accessors:
>> >
>> > - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
>> > access_ok() checks")
>> > - the sg/bsg read/write APIs
>> > - the infiniband read/write APIs
>> >
>> > These don't happen all that often, but when they do happen, it is hard to
>> > test for them properly; and it is probably also hard to discover them with
>> > fuzzing. Even when an unmapped kernel address is supplied to such buggy
>> > code, it just returns -EFAULT instead of doing a proper BUG() or at least
>> > WARN().
>> >
>> > This patch attempts to make such misbehaving code a bit more visible by
>> > WARN()ing in the pagefault handler code when a userspace accessor causes
>> > #PF on a kernel address and the current context isn't whitelisted.
>>
>> I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before the fancy extable code, though, so it was a mess. Here are some thoughts:
>>
>> - It should be three patches. One patch to add the _UA annotations, one to improve the info passes to the handlers, and one to change behavior.
>>
>> - You should pass the vector, the error code, and the address to the handler.
>
> I'm polishing the patch a bit, and I've noticed that to plumb the
> error code and address through properly, I might need significantly
> more code churn because of kprobes - I want to make sure I'm not going
> down the completely wrong path here.
>
> I'm extending fixup_exception() to take two extra args "unsigned long
> error_code, unsigned long fault_addr". Most callers of
> fixup_exception() are fairly straightforward, but
> kprobe_fault_handler() has a dozen callchains from different exception
> handlers, and most of them are coming via notify_die().
KILL IT WITH FIRE!!!!!!!!
More seriously, kill kprobe_exceptions_notify() and just fold the
contents into do_general_protection(). This notifier chain crap is
incomprehensible. I would love to see notify_die() removed entirely,
and crap like this is just more reason to want it gone.
> I think there's also some inconsistency between #PF and #GP in the
> ordering of error handling:
It's probably a bug. It's also probably irrelevant, but maybe not.
On Thu, Aug 23, 2018 at 2:28 AM Andy Lutomirski <[email protected]> wrote:
>
> On Wed, Aug 22, 2018 at 4:53 PM, Jann Horn <[email protected]> wrote:
> > On Tue, Aug 7, 2018 at 4:55 AM Andy Lutomirski <[email protected]> wrote:
> >> > On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
> >> > There have been multiple kernel vulnerabilities that permitted userspace to
> >> > pass completely unchecked pointers through to userspace accessors:
> >> >
> >> > - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
> >> > access_ok() checks")
> >> > - the sg/bsg read/write APIs
> >> > - the infiniband read/write APIs
> >> >
> >> > These don't happen all that often, but when they do happen, it is hard to
> >> > test for them properly; and it is probably also hard to discover them with
> >> > fuzzing. Even when an unmapped kernel address is supplied to such buggy
> >> > code, it just returns -EFAULT instead of doing a proper BUG() or at least
> >> > WARN().
> >> >
> >> > This patch attempts to make such misbehaving code a bit more visible by
> >> > WARN()ing in the pagefault handler code when a userspace accessor causes
> >> > #PF on a kernel address and the current context isn't whitelisted.
> >>
> >> I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before the fancy extable code, though, so it was a mess. Here are some thoughts:
> >>
> >> - It should be three patches. One patch to add the _UA annotations, one to improve the info passes to the handlers, and one to change behavior.
> >>
> >> - You should pass the vector, the error code, and the address to the handler.
> >
> > I'm polishing the patch a bit, and I've noticed that to plumb the
> > error code and address through properly, I might need significantly
> > more code churn because of kprobes - I want to make sure I'm not going
> > down the completely wrong path here.
> >
> > I'm extending fixup_exception() to take two extra args "unsigned long
> > error_code, unsigned long fault_addr". Most callers of
> > fixup_exception() are fairly straightforward, but
> > kprobe_fault_handler() has a dozen callchains from different exception
> > handlers, and most of them are coming via notify_die().
>
> KILL IT WITH FIRE!!!!!!!!
>
> More seriously, kill kprobe_exceptions_notify() and just fold the
> contents into do_general_protection(). This notifier chain crap is
> incomprehensible. I would love to see notify_die() removed entirely,
> and crap like this is just more reason to want it gone.
This isn't just do_general_protection() though, that's just one
example. As far as I can tell, similar stuff happens everywhere where
notify_die() is used - #DF, #BR, #BP, #MF and so on.
> > I think there's also some inconsistency between #PF and #GP in the
> > ordering of error handling:
>
> It's probably a bug. It's also probably irrelevant, but maybe not.
Depends on what people do in their ->fault_handler hooks, I guess.
Yeah, probably doesn't matter.
On Wed, Aug 22, 2018 at 5:55 PM, Jann Horn <[email protected]> wrote:
> On Thu, Aug 23, 2018 at 2:28 AM Andy Lutomirski <[email protected]> wrote:
>>
>> On Wed, Aug 22, 2018 at 4:53 PM, Jann Horn <[email protected]> wrote:
>> > On Tue, Aug 7, 2018 at 4:55 AM Andy Lutomirski <[email protected]> wrote:
>> >> > On Aug 6, 2018, at 6:22 PM, Jann Horn <[email protected]> wrote:
>> >> > There have been multiple kernel vulnerabilities that permitted userspace to
>> >> > pass completely unchecked pointers through to userspace accessors:
>> >> >
>> >> > - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
>> >> > access_ok() checks")
>> >> > - the sg/bsg read/write APIs
>> >> > - the infiniband read/write APIs
>> >> >
>> >> > These don't happen all that often, but when they do happen, it is hard to
>> >> > test for them properly; and it is probably also hard to discover them with
>> >> > fuzzing. Even when an unmapped kernel address is supplied to such buggy
>> >> > code, it just returns -EFAULT instead of doing a proper BUG() or at least
>> >> > WARN().
>> >> >
>> >> > This patch attempts to make such misbehaving code a bit more visible by
>> >> > WARN()ing in the pagefault handler code when a userspace accessor causes
>> >> > #PF on a kernel address and the current context isn't whitelisted.
>> >>
>> >> I like this a lot, and, in fact, I once wrote a patch to do something similar. It was before the fancy extable code, though, so it was a mess. Here are some thoughts:
>> >>
>> >> - It should be three patches. One patch to add the _UA annotations, one to improve the info passes to the handlers, and one to change behavior.
>> >>
>> >> - You should pass the vector, the error code, and the address to the handler.
>> >
>> > I'm polishing the patch a bit, and I've noticed that to plumb the
>> > error code and address through properly, I might need significantly
>> > more code churn because of kprobes - I want to make sure I'm not going
>> > down the completely wrong path here.
>> >
>> > I'm extending fixup_exception() to take two extra args "unsigned long
>> > error_code, unsigned long fault_addr". Most callers of
>> > fixup_exception() are fairly straightforward, but
>> > kprobe_fault_handler() has a dozen callchains from different exception
>> > handlers, and most of them are coming via notify_die().
>>
>> KILL IT WITH FIRE!!!!!!!!
>>
>> More seriously, kill kprobe_exceptions_notify() and just fold the
>> contents into do_general_protection(). This notifier chain crap is
>> incomprehensible. I would love to see notify_die() removed entirely,
>> and crap like this is just more reason to want it gone.
>
> This isn't just do_general_protection() though, that's just one
> example. As far as I can tell, similar stuff happens everywhere where
> notify_die() is used - #DF, #BR, #BP, #MF and so on.
True. But there aren't actually that many places in the kernel that
use die notifiers at all. Here's the complete list, excluding non-x86
arch-specific ones, annotated a bit:
arch/x86/kernel/kgdb.c: retval = register_die_notifier(&kgdb_notifier);
arch/x86/kernel/kgdb.c: unregister_die_notifier(&kgdb_notifier);
arch/x86/kernel/kgdb.c: unregister_die_notifier(&kgdb_notifier);
OK, maybe not totally crazy for kgdb.
arch/x86/mm/kasan_init_64.c: register_die_notifier(&kasan_die_notifier);
Should be in traps.c directly.
arch/x86/mm/kmmio.c: return register_die_notifier(&nb_die);
arch/x86/mm/kmmio.c: unregister_die_notifier(&nb_die);
Should probably be in traps.c directly.
drivers/bus/brcmstb_gisb.c: register_die_notifier(&gisb_die_notifier);
I don't know WTF this is, but it is certainly garbage if anyone ever
tries to build this on x86.
drivers/dma/sh/shdmac.c: int err =
register_die_notifier(&sh_dmae_nmi_notifier);
drivers/dma/sh/shdmac.c: unregister_die_notifier(&sh_dmae_nmi_notifier);
SH-specific, I think. Not really sure.
drivers/firmware/google/gsmi.c: register_die_notifier(&gsmi_die_notifier);
drivers/firmware/google/gsmi.c: unregister_die_notifier(&gsmi_die_notifier);
This is actually an *oops* notifier. That's totally reasonable, but
it should be a separate OOPS notification chain.
drivers/hv/vmbus_drv.c: register_die_notifier(&hyperv_die_block);
drivers/hv/vmbus_drv.c: unregister_die_notifier(&hyperv_die_block);
Appears to *want* to be an OOPS notifier, but it appears to be rather
buggy and to actually catch everything.
drivers/misc/sgi-xp/xpc_main.c:
(void)unregister_die_notifier(&xpc_die_notifier);
drivers/misc/sgi-xp/xpc_main.c: ret =
register_die_notifier(&xpc_die_notifier);
drivers/misc/sgi-xp/xpc_main.c:
(void)unregister_die_notifier(&xpc_die_notifier);
Haven't looked.
kernel/events/hw_breakpoint.c: return
register_die_notifier(&hw_breakpoint_exceptions_nb);
This is an utter abomination and needs to be killed. The #DB code is
gnarly enough without this particular indirection. I want to kill it
on x86. I have a big #DB cleanup series. Maybe I'll do it.
kernel/events/uprobes.c: return register_die_notifier(&uprobe_exception_nb);
For Pete's sake, the code should just call
uprobe_pre_sstep_notifier(regs)) and uprobe_post_sstep_notifier(regs)
directly.
kernel/kprobes.c: err = register_die_notifier(&kprobe_exceptions_nb);
This is yours :) And it is literally only hooking DIE_GPF. Just make
the body empty, add a comment like /* XXX: the core code expects this
function to exist */ and call the hander from do_general_protection().
kernel/trace/trace.c: register_die_notifier(&trace_die_notifier);
This is an OOPS notifier.
In any event, the particular offending kprobe notifier is literally
only checking for DIE_GPF. So it really can be folded into
do_general_protection(). Or you could add fault_address to die_args.