2019-03-07 11:53:39

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 00/20] objtool: UACCESS validation v3

Teach objtool to validate the UACCESS (SMAP, PAN) rules with are currently
unenforced and (therefore obviously) violated.

UACCESS sections should be small; we want to limit the amount of code that can
touch userspace. Furthermore, UACCESS state isn't scheduled, this means that
anything that directly calls into the scheduler will result in random code
running with UACCESS enabled and possibly getting back into the UACCESS region
with UACCESS disabled and causing faults.

Forbid any CALL/RET while UACCESS is enabled; but provide a few exceptions.

This builds x86_64-allmodconfig clean, and I've only got a few randconfig
failures left (GCC-8) that I'm not quite understanding.

---
arch/x86/ia32/ia32_signal.c | 29 ++-
arch/x86/include/asm/asm.h | 24 --
arch/x86/include/asm/nospec-branch.h | 4 +-
arch/x86/include/asm/smap.h | 20 ++
arch/x86/include/asm/uaccess.h | 5 +-
arch/x86/include/asm/uaccess_64.h | 3 -
arch/x86/include/asm/xen/hypercall.h | 26 +-
arch/x86/kernel/signal.c | 2 +-
arch/x86/lib/copy_user_64.S | 48 ++++
arch/x86/lib/memcpy_64.S | 3 +-
arch/x86/lib/usercopy_64.c | 20 --
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +-
include/linux/uaccess.h | 2 +
kernel/trace/trace_branch.c | 4 +
lib/Makefile | 1 +
lib/ubsan.c | 4 +
mm/kasan/Makefile | 3 +
mm/kasan/common.c | 10 +
mm/kasan/report.c | 3 +-
scripts/Makefile.build | 3 +
tools/objtool/Makefile | 2 +-
tools/objtool/arch.h | 8 +-
tools/objtool/arch/x86/decode.c | 26 +-
tools/objtool/builtin-check.c | 4 +-
tools/objtool/builtin.h | 2 +-
tools/objtool/check.c | 382 ++++++++++++++++++++++-------
tools/objtool/check.h | 4 +-
tools/objtool/elf.c | 15 +-
tools/objtool/elf.h | 3 +-
tools/objtool/special.c | 10 +-
tools/objtool/warn.h | 8 +
31 files changed, 511 insertions(+), 173 deletions(-)






2019-03-07 12:04:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 12:45:11PM +0100, Peter Zijlstra wrote:
> I've only got a few randconfig
> failures left (GCC-8) that I'm not quite understanding.

Take for instance this one (.config attached); it has both
CONFIG_PROFILE_ALL_BRANCHES=y and CONFIG_TRACE_BRANCH_PROFILING=y
and it compiles:

(from kernel/exit.c)

SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
infop, int, options, struct rusage __user *, ru)
{
struct rusage r;
struct waitid_info info = {.status = 0};
long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
int signo = 0;

if (err > 0) {
signo = SIGCHLD;
err = 0;
if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
return -EFAULT;
}
if (!infop)
return err;

if (!user_access_begin(infop, sizeof(*infop)))
return -EFAULT;

unsafe_put_user(signo, &infop->si_signo, Efault);
unsafe_put_user(0, &infop->si_errno, Efault);
unsafe_put_user(info.cause, &infop->si_code, Efault);
unsafe_put_user(info.pid, &infop->si_pid, Efault);
unsafe_put_user(info.uid, &infop->si_uid, Efault);
unsafe_put_user(info.status, &infop->si_status, Efault);
user_access_end();
return err;
Efault:
user_access_end();
return -EFAULT;
}

into this atrocious crap (grep -v ffffffffffffe0eb; if you want the src
to go away):

$ objdump -Sdr randconfig-build/kernel/exit.o | awk "/>:\$/ { P=0; } /__do_sys_waitid>:\$/ { P=1; O=strtonum(\"0x\" \$1); } { if (P) { o=strtonum(\"0x\" \$1); printf(\"%04x \", o-O); print \$0; } }"

0000 0000000000001f15 <__do_sys_waitid>:
ffffffffffffe0eb
ffffffffffffe0eb SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
ffffffffffffe0eb infop, int, options, struct rusage __user *, ru)
ffffffffffffe0eb {
0000 1f15: 55 push %rbp
ffffffffffffe0eb struct rusage r;
ffffffffffffe0eb struct waitid_info info = {.status = 0};
ffffffffffffe0eb long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
0001 1f16: b8 00 00 00 00 mov $0x0,%eax
ffffffffffffe0eb {
0006 1f1b: 48 89 e5 mov %rsp,%rbp
0009 1f1e: 41 57 push %r15
000b 1f20: 41 56 push %r14
000d 1f22: 41 55 push %r13
000f 1f24: 41 54 push %r12
0011 1f26: 4d 89 c4 mov %r8,%r12
0014 1f29: 53 push %rbx
0015 1f2a: 48 89 d3 mov %rdx,%rbx
0018 1f2d: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
001c 1f31: 48 81 ec a0 00 00 00 sub $0xa0,%rsp
ffffffffffffe0eb long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
0023 1f38: 4d 85 e4 test %r12,%r12
0026 1f3b: 4c 8d 44 24 10 lea 0x10(%rsp),%r8
002b 1f40: 48 89 e2 mov %rsp,%rdx
ffffffffffffe0eb struct waitid_info info = {.status = 0};
002e 1f43: 48 c7 04 24 00 00 00 movq $0x0,(%rsp)
0035 1f4a: 00
0036 1f4b: 48 c7 44 24 08 00 00 movq $0x0,0x8(%rsp)
003d 1f52: 00 00
ffffffffffffe0eb long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
003f 1f54: 4c 0f 44 c0 cmove %rax,%r8
0043 1f58: e8 9c fe ff ff callq 1df9 <kernel_waitid>
ffffffffffffe0eb int signo = 0;
ffffffffffffe0eb
ffffffffffffe0eb if (err > 0) {
0048 1f5d: 48 85 c0 test %rax,%rax
ffffffffffffe0eb long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
004b 1f60: 49 89 c7 mov %rax,%r15
ffffffffffffe0eb if (err > 0) {
004e 1f63: 0f 9f c0 setg %al
0051 1f66: 0f b6 c0 movzbl %al,%eax
0054 1f69: 48 83 c0 02 add $0x2,%rax
0058 1f6d: 48 ff 04 c5 00 00 00 incq 0x0(,%rax,8)
005f 1f74: 00
005c 1f71: R_X86_64_32S _ftrace_branch+0x1c0
0060 1f75: 4d 85 ff test %r15,%r15
0063 1f78: 7e 7b jle 1ff5 <__do_sys_waitid+0xe0>
ffffffffffffe0eb signo = SIGCHLD;
ffffffffffffe0f9 err = 0;
ffffffffffffe0eb if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
0065 1f7a: 31 d2 xor %edx,%edx
0067 1f7c: 4d 85 e4 test %r12,%r12
006a 1f7f: 74 53 je 1fd4 <__do_sys_waitid+0xbf>
ffffffffffffe0eb
ffffffffffffe0eb static __always_inline bool
ffffffffffffe0f7 check_copy_size(const void *addr, size_t bytes, bool is_source)
ffffffffffffe0eb {
ffffffffffffe0eb int sz = __compiletime_object_size(addr);
ffffffffffffe0eb if (unlikely(sz >= 0 && sz < bytes)) {
006c 1f81: 31 f6 xor %esi,%esi
006e 1f83: b9 01 00 00 00 mov $0x1,%ecx
0073 1f88: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
0076 1f8b: R_X86_64_32S _ftrace_annotated_branch+0xcc0
007a 1f8f: e8 00 00 00 00 callq 1f94 <__do_sys_waitid+0x7f>
007b 1f90: R_X86_64_PLT32 ftrace_likely_update-0x4
ffffffffffffe0eb }
ffffffffffffe0eb
ffffffffffffe0eb static __always_inline unsigned long __must_check
ffffffffffffe0f7 copy_to_user(void __user *to, const void *from, unsigned long n)
ffffffffffffe0eb {
ffffffffffffe0eb if (likely(check_copy_size(from, n, true)))
007f 1f94: 31 c9 xor %ecx,%ecx
0081 1f96: ba 01 00 00 00 mov $0x1,%edx
0086 1f9b: be 01 00 00 00 mov $0x1,%esi
008b 1fa0: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
008e 1fa3: R_X86_64_32S _ftrace_annotated_branch+0xae0
0092 1fa7: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 1fae <__do_sys_waitid+0x99>
0095 1faa: R_X86_64_PC32 _ftrace_branch+0x1fcc
0099 1fae: e8 00 00 00 00 callq 1fb3 <__do_sys_waitid+0x9e>
009a 1faf: R_X86_64_PLT32 ftrace_likely_update-0x4
ffffffffffffe0eb n = _copy_to_user(to, from, n);
009e 1fb3: ba 90 00 00 00 mov $0x90,%edx
00a3 1fb8: 4c 89 e7 mov %r12,%rdi
ffffffffffffe0eb if (likely(check_copy_size(from, n, true)))
00a6 1fbb: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 1fc2 <__do_sys_waitid+0xad>
00a9 1fbe: R_X86_64_PC32 _ftrace_branch+0x1d7c
ffffffffffffe0eb n = _copy_to_user(to, from, n);
00ad 1fc2: 48 8d 74 24 10 lea 0x10(%rsp),%rsi
00b2 1fc7: e8 00 00 00 00 callq 1fcc <__do_sys_waitid+0xb7>
00b3 1fc8: R_X86_64_PLT32 _copy_to_user-0x4
00b7 1fcc: 31 d2 xor %edx,%edx
00b9 1fce: 48 85 c0 test %rax,%rax
00bc 1fd1: 0f 95 c2 setne %dl
00bf 1fd4: 48 63 c2 movslq %edx,%rax
ffffffffffffe0eb signo = SIGCHLD;
00c2 1fd7: 41 be 11 00 00 00 mov $0x11,%r14d
ffffffffffffe0f9 err = 0;
00c8 1fdd: 45 31 ff xor %r15d,%r15d
ffffffffffffe0eb if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
00cb 1fe0: 48 83 c0 02 add $0x2,%rax
00cf 1fe4: 48 ff 04 c5 00 00 00 incq 0x0(,%rax,8)
00d6 1feb: 00
00d3 1fe8: R_X86_64_32S _ftrace_branch+0x198
00d7 1fec: 85 d2 test %edx,%edx
00d9 1fee: 74 08 je 1ff8 <__do_sys_waitid+0xe3>
00db 1ff0: e9 33 01 00 00 jmpq 2128 <__do_sys_waitid+0x213>
ffffffffffffe0eb int signo = 0;
00e0 1ff5: 45 31 f6 xor %r14d,%r14d
ffffffffffffe0eb return -EFAULT;
ffffffffffffe0eb }
ffffffffffffe0eb if (!infop)
00e3 1ff8: 31 c0 xor %eax,%eax
00e5 1ffa: 48 85 db test %rbx,%rbx
00e8 1ffd: 0f 94 c0 sete %al
00eb 2000: 48 83 c0 02 add $0x2,%rax
00ef 2004: 48 ff 04 c5 00 00 00 incq 0x0(,%rax,8)
00f6 200b: 00
00f3 2008: R_X86_64_32S _ftrace_branch+0x170
00f7 200c: 48 85 db test %rbx,%rbx
00fa 200f: 0f 84 1a 01 00 00 je 212f <__do_sys_waitid+0x21a>
ffffffffffffe0eb return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
0100 2015: 65 44 8b 25 00 00 00 mov %gs:0x0(%rip),%r12d # 201d <__do_sys_waitid+0x108>
0107 201c: 00
0104 2019: R_X86_64_PC32 __preempt_count-0x4
ffffffffffffe0eb * checking before using them, but you have to surround them with the
ffffffffffffe0eb * user_access_begin/end() pair.
ffffffffffffe0eb */
ffffffffffffe0eb static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
ffffffffffffe0eb {
ffffffffffffe0eb if (unlikely(!access_ok(ptr,len)))
0108 201d: 45 31 ed xor %r13d,%r13d
010b 2020: 41 81 e4 00 01 1f 00 and $0x1f0100,%r12d
0112 2027: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
0115 202a: R_X86_64_32S _ftrace_annotated_branch+0xb70
0119 202e: 41 0f 95 c5 setne %r13b
011d 2032: 31 c9 xor %ecx,%ecx
011f 2034: 31 d2 xor %edx,%edx
0121 2036: 44 89 ee mov %r13d,%esi
0124 2039: e8 00 00 00 00 callq 203e <__do_sys_waitid+0x129>
0125 203a: R_X86_64_PLT32 ftrace_likely_update-0x4
0129 203e: 49 63 c5 movslq %r13d,%rax
012c 2041: 48 83 c0 02 add $0x2,%rax
0130 2045: 48 ff 04 c5 00 00 00 incq 0x0(,%rax,8)
0137 204c: 00
0134 2049: R_X86_64_32S _ftrace_branch+0x1d90
0138 204d: 45 85 e4 test %r12d,%r12d
013b 2050: 74 02 je 2054 <__do_sys_waitid+0x13f>
013d 2052: 0f 0b ud2
013f 2054: 31 c9 xor %ecx,%ecx
0141 2056: 31 d2 xor %edx,%edx
0143 2058: 44 89 ee mov %r13d,%esi
0146 205b: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
0149 205e: R_X86_64_32S _ftrace_annotated_branch+0xb40
014d 2062: e8 00 00 00 00 callq 2067 <__do_sys_waitid+0x152>
014e 2063: R_X86_64_PLT32 ftrace_likely_update-0x4
ffffffffffffe0eb return unlikely(addr > limit - size);
0152 2067: 45 31 e4 xor %r12d,%r12d
0155 206a: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
0158 206d: R_X86_64_32S _ftrace_annotated_branch+0xbd0
015c 2071: 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
0163 2078: 00 00
0161 2076: R_X86_64_32S current_task
0165 207a: 4c 8b a8 d8 1d 00 00 mov 0x1dd8(%rax),%r13
016c 2081: 49 83 c5 80 add $0xffffffffffffff80,%r13
0170 2085: 4c 39 eb cmp %r13,%rbx
0173 2088: 41 0f 97 c4 seta %r12b
0177 208c: 31 c9 xor %ecx,%ecx
0179 208e: 31 d2 xor %edx,%edx
017b 2090: 44 89 e6 mov %r12d,%esi
017e 2093: e8 00 00 00 00 callq 2098 <__do_sys_waitid+0x183>
017f 2094: R_X86_64_PLT32 ftrace_likely_update-0x4
ffffffffffffe0eb if (unlikely(!access_ok(ptr,len)))
0183 2098: 31 f6 xor %esi,%esi
0185 209a: 4c 39 eb cmp %r13,%rbx
0188 209d: ba 01 00 00 00 mov $0x1,%edx
018d 20a2: 40 0f 96 c6 setbe %sil
0191 20a6: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
0194 20a9: R_X86_64_32S _ftrace_annotated_branch+0xb10
0198 20ad: 31 c9 xor %ecx,%ecx
019a 20af: e8 00 00 00 00 callq 20b4 <__do_sys_waitid+0x19f>
019b 20b0: R_X86_64_PLT32 ftrace_likely_update-0x4
019f 20b4: 44 89 e6 mov %r12d,%esi
01a2 20b7: 31 c9 xor %ecx,%ecx
01a4 20b9: 31 d2 xor %edx,%edx
01a6 20bb: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
01a9 20be: R_X86_64_32S _ftrace_annotated_branch+0xba0
01ad 20c2: 49 83 c4 02 add $0x2,%r12
01b1 20c6: e8 00 00 00 00 callq 20cb <__do_sys_waitid+0x1b6>
01b2 20c7: R_X86_64_PLT32 ftrace_likely_update-0x4
01b6 20cb: 4a ff 04 e5 00 00 00 incq 0x0(,%r12,8)
01bd 20d2: 00
01ba 20cf: R_X86_64_32S _ftrace_branch+0x1db8
ffffffffffffe0eb return 0;
01be 20d3: 31 c0 xor %eax,%eax
ffffffffffffe0eb if (unlikely(!access_ok(ptr,len)))
01c0 20d5: 4c 39 eb cmp %r13,%rbx
01c3 20d8: 77 08 ja 20e2 <__do_sys_waitid+0x1cd>
ffffffffffffe0eb __uaccess_begin_nospec();
01c5 20da: 90 nop
01c6 20db: 90 nop
01c7 20dc: 90 nop
ffffffffffffe0eb }
ffffffffffffe0eb
ffffffffffffe0eb static __always_inline void stac(void)
ffffffffffffe0eb {
ffffffffffffe0eb /* Note: a barrier is implicit in alternative() */
ffffffffffffe0f5 alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
01c8 20dd: 90 nop
01c9 20de: 90 nop
01ca 20df: 90 nop
ffffffffffffe0eb return 1;
01cb 20e0: b0 01 mov $0x1,%al
ffffffffffffe0eb return err;
ffffffffffffe0eb
ffffffffffffe0eb if (!user_access_begin(infop, sizeof(*infop)))
01cd 20e2: 83 f0 01 xor $0x1,%eax
01d0 20e5: 48 89 c2 mov %rax,%rdx
01d3 20e8: 83 e2 01 and $0x1,%edx
01d6 20eb: 48 83 c2 02 add $0x2,%rdx
01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
01e1 20f6: 00
01de 20f3: R_X86_64_32S _ftrace_branch+0x148
01e2 20f7: 84 c0 test %al,%al
01e4 20f9: 75 2d jne 2128 <__do_sys_waitid+0x213>
ffffffffffffe0eb return -EFAULT;
ffffffffffffe0eb
ffffffffffffe0eb unsafe_put_user(signo, &infop->si_signo, Efault);
01e6 20fb: 44 89 33 mov %r14d,(%rbx)
ffffffffffffe0eb unsafe_put_user(0, &infop->si_errno, Efault);
01e9 20fe: c7 43 04 00 00 00 00 movl $0x0,0x4(%rbx)
ffffffffffffe0eb unsafe_put_user(info.cause, &infop->si_code, Efault);
01f0 2105: 8b 44 24 0c mov 0xc(%rsp),%eax
01f4 2109: 89 43 08 mov %eax,0x8(%rbx)
ffffffffffffe0eb unsafe_put_user(info.pid, &infop->si_pid, Efault);
01f7 210c: 8b 04 24 mov (%rsp),%eax
01fa 210f: 89 43 10 mov %eax,0x10(%rbx)
ffffffffffffe0eb unsafe_put_user(info.uid, &infop->si_uid, Efault);
01fd 2112: 8b 44 24 04 mov 0x4(%rsp),%eax
0201 2116: 89 43 14 mov %eax,0x14(%rbx)
ffffffffffffe0eb unsafe_put_user(info.status, &infop->si_status, Efault);
0204 2119: 8b 44 24 08 mov 0x8(%rsp),%eax
0208 211d: 89 43 18 mov %eax,0x18(%rbx)
ffffffffffffe0eb user_access_end();
020b 2120: 90 nop
020c 2121: 90 nop
020d 2122: 90 nop
ffffffffffffe0eb return err;
020e 2123: eb 0a jmp 212f <__do_sys_waitid+0x21a>
ffffffffffffefe5 Efault:
ffffffffffffe0eb user_access_end();
0210 2125: 90 nop
0211 2126: 90 nop
0212 2127: 90 nop
ffffffffffffe0eb return -EFAULT;
0213 2128: 49 c7 c7 f2 ff ff ff mov $0xfffffffffffffff2,%r15
ffffffffffffe0eb }
021a 212f: 48 8d 65 d8 lea -0x28(%rbp),%rsp
021e 2133: 4c 89 f8 mov %r15,%rax
0221 2136: 5b pop %rbx
0222 2137: 41 5c pop %r12
0224 2139: 41 5d pop %r13
0226 213b: 41 5e pop %r14
0228 213d: 41 5f pop %r15
022a 213f: 5d pop %rbp
022b 2140: c3 retq
ffffffffffffe0eb

And then complains about:

$ tools/objtool/objtool check --no-fp --uaccess --backtrace randconfig-build/kernel/exit.o
randconfig-build/kernel/exit.o: warning: objtool: .altinstr_replacement+0xc: redundant UACCESS disable
randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x210: (alt)
randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x1e6: (alt)
randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x1c3: (branch)
randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x13b: (branch)
randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x63: (branch)
randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x0: <=== (func)

Which, afaict is correct given the asm, but is absolute nonsense given
the original C. If you follow that code path, it appears to do the
memops without STAC, and then complains it does CLAC. Which is of course
complete crap.

Maybe I've been staring at this too long and am (again) missing the
obvious :/


Attachments:
(No filename) (16.32 kB)
randconfig-fail (93.70 kB)
Download all attachments

2019-03-07 12:56:37

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:


> 01c3 20d8: 77 08 ja 20e2 <__do_sys_waitid+0x1cd>

taken:

randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x1c3: (branch)

> ffffffffffffe0f5 alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
> 01c8 20dd: 90 nop
> 01c9 20de: 90 nop
> 01ca 20df: 90 nop

Bye-bye STAC, jumped straight over.

> 01cd 20e2: 83 f0 01 xor $0x1,%eax
> 01d0 20e5: 48 89 c2 mov %rax,%rdx
> 01d3 20e8: 83 e2 01 and $0x1,%edx
> 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
> 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
> 01e1 20f6: 00
> 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
> 01e2 20f7: 84 c0 test %al,%al
> 01e4 20f9: 75 2d jne 2128 <__do_sys_waitid+0x213>

we do not take this branch and fall-through.

> ffffffffffffe0eb return -EFAULT;
> ffffffffffffe0eb
> ffffffffffffe0eb unsafe_put_user(signo, &infop->si_signo, Efault);
> 01e6 20fb: 44 89 33 mov %r14d,(%rbx)

fault, take exception:

randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x1e6: (alt)

Relocation section '.rela__ex_table' at offset 0x21568 contains 18 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000000 000200000002 R_X86_64_PC32 0000000000000000 .text + 20fb
000000000004 000200000002 R_X86_64_PC32 0000000000000000 .text + 2125

> ffffffffffffefe5 Efault:
> ffffffffffffe0eb user_access_end();
> 0210 2125: 90 nop
> 0211 2126: 90 nop
> 0212 2127: 90 nop

randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x210: (alt)

000000000034 000200000002 R_X86_64_PC32 0000000000000000 .text + 2125
000000000038 00c800000002 R_X86_64_PC32 0000000000000000 .altinstr_replacement + c

0000000000000000 <.altinstr_replacement>:
c: 0f 01 ca clac

randconfig-build/kernel/exit.o: warning: objtool: .altinstr_replacement+0xc: redundant UACCESS disable


2019-03-07 13:15:37

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 01:55:26PM +0100, Peter Zijlstra wrote:
> On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:


> > 01be 20d3: 31 c0 xor %eax,%eax
> > 01c0 20d5: 4c 39 eb cmp %r13,%rbx
> > 01c3 20d8: 77 08 ja 20e2 <__do_sys_waitid+0x1cd>

randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x1c3: (branch)

> > 01cd 20e2: 83 f0 01 xor $0x1,%eax
> > 01d0 20e5: 48 89 c2 mov %rax,%rdx
> > 01d3 20e8: 83 e2 01 and $0x1,%edx
> > 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
> > 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
> > 01e1 20f6: 00
> > 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
> > 01e2 20f7: 84 c0 test %al,%al
> > 01e4 20f9: 75 2d jne 2128 <__do_sys_waitid+0x213>
>
> we do not take this branch and fall-through.

And that is the error, I think. We should've taken it and went to:

return -EFAULT;

because:

+1be xor %eax,%eax eax=0
+1cd xor $0x1,%eax eax=1
+1e2 test %al,%al 1&1==1 -> ZF=0
+1e4 jnz

Is an unconditional code sequence, but there's no way objtool can do
that without becoming a full blown interpreter :/

> > 0213 2128: 49 c7 c7 f2 ff ff ff mov $0xfffffffffffffff2,%r15
> > ffffffffffffe0eb }
> > 021a 212f: 48 8d 65 d8 lea -0x28(%rbp),%rsp
> > 021e 2133: 4c 89 f8 mov %r15,%rax
> > 0221 2136: 5b pop %rbx
> > 0222 2137: 41 5c pop %r12
> > 0224 2139: 41 5d pop %r13
> > 0226 213b: 41 5e pop %r14
> > 0228 213d: 41 5f pop %r15
> > 022a 213f: 5d pop %rbp
> > 022b 2140: c3 retq

2019-03-07 16:32:13

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 7, 2019 at 4:03 AM Peter Zijlstra <[email protected]> wrote:
>
> Take for instance this one (.config attached); it has both
> CONFIG_PROFILE_ALL_BRANCHES=y and CONFIG_TRACE_BRANCH_PROFILING=y
> and it compiles:

How about just turning off SMAP checking for the really odd cases?

At some point it's not worth worrying about excessive debug
infrastructure, I think. Just give up and say "ok. we'll save/restore
in preempt_schedule()".

Or just make preempt_schedule() check AC and not schedule. It already
does that for IF.

Linus

2019-03-07 16:48:28

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 02:13:12PM +0100, Peter Zijlstra wrote:
> On Thu, Mar 07, 2019 at 01:55:26PM +0100, Peter Zijlstra wrote:
> > On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:
>
>
> > > 01be 20d3: 31 c0 xor %eax,%eax
> > > 01c0 20d5: 4c 39 eb cmp %r13,%rbx
> > > 01c3 20d8: 77 08 ja 20e2 <__do_sys_waitid+0x1cd>
>
> randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x1c3: (branch)
>
> > > 01cd 20e2: 83 f0 01 xor $0x1,%eax
> > > 01d0 20e5: 48 89 c2 mov %rax,%rdx
> > > 01d3 20e8: 83 e2 01 and $0x1,%edx
> > > 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
> > > 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
> > > 01e1 20f6: 00
> > > 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
> > > 01e2 20f7: 84 c0 test %al,%al
> > > 01e4 20f9: 75 2d jne 2128 <__do_sys_waitid+0x213>
> >
> > we do not take this branch and fall-through.
>
> And that is the error, I think. We should've taken it and went to:
>
> return -EFAULT;
>
> because:
>
> +1be xor %eax,%eax eax=0
> +1cd xor $0x1,%eax eax=1
> +1e2 test %al,%al 1&1==1 -> ZF=0
> +1e4 jnz
>
> Is an unconditional code sequence, but there's no way objtool can do
> that without becoming a full blown interpreter :/
>
> > > 0213 2128: 49 c7 c7 f2 ff ff ff mov $0xfffffffffffffff2,%r15
> > > ffffffffffffe0eb }
> > > 021a 212f: 48 8d 65 d8 lea -0x28(%rbp),%rsp
> > > 021e 2133: 4c 89 f8 mov %r15,%rax
> > > 0221 2136: 5b pop %rbx
> > > 0222 2137: 41 5c pop %r12
> > > 0224 2139: 41 5d pop %r13
> > > 0226 213b: 41 5e pop %r14
> > > 0228 213d: 41 5f pop %r15
> > > 022a 213f: 5d pop %rbp
> > > 022b 2140: c3 retq

This "fixes" it, and also seems to help -Os make much code:

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 445348facea9..8de63db58fdd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
.line = __LINE__, \
}; \
______r = !!(cond); \
- ______f.miss_hit[______r]++; \
+ if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
______r; \
}))
#endif /* CONFIG_PROFILE_ALL_BRANCHES */

2019-03-07 16:51:01

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 10:47:05AM -0600, Josh Poimboeuf wrote:
> On Thu, Mar 07, 2019 at 02:13:12PM +0100, Peter Zijlstra wrote:
> > On Thu, Mar 07, 2019 at 01:55:26PM +0100, Peter Zijlstra wrote:
> > > On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:
> >
> >
> > > > 01be 20d3: 31 c0 xor %eax,%eax
> > > > 01c0 20d5: 4c 39 eb cmp %r13,%rbx
> > > > 01c3 20d8: 77 08 ja 20e2 <__do_sys_waitid+0x1cd>
> >
> > randconfig-build/kernel/exit.o: warning: objtool: __do_sys_waitid()+0x1c3: (branch)
> >
> > > > 01cd 20e2: 83 f0 01 xor $0x1,%eax
> > > > 01d0 20e5: 48 89 c2 mov %rax,%rdx
> > > > 01d3 20e8: 83 e2 01 and $0x1,%edx
> > > > 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
> > > > 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
> > > > 01e1 20f6: 00
> > > > 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
> > > > 01e2 20f7: 84 c0 test %al,%al
> > > > 01e4 20f9: 75 2d jne 2128 <__do_sys_waitid+0x213>
> > >
> > > we do not take this branch and fall-through.
> >
> > And that is the error, I think. We should've taken it and went to:
> >
> > return -EFAULT;
> >
> > because:
> >
> > +1be xor %eax,%eax eax=0
> > +1cd xor $0x1,%eax eax=1
> > +1e2 test %al,%al 1&1==1 -> ZF=0
> > +1e4 jnz
> >
> > Is an unconditional code sequence, but there's no way objtool can do
> > that without becoming a full blown interpreter :/
> >
> > > > 0213 2128: 49 c7 c7 f2 ff ff ff mov $0xfffffffffffffff2,%r15
> > > > ffffffffffffe0eb }
> > > > 021a 212f: 48 8d 65 d8 lea -0x28(%rbp),%rsp
> > > > 021e 2133: 4c 89 f8 mov %r15,%rax
> > > > 0221 2136: 5b pop %rbx
> > > > 0222 2137: 41 5c pop %r12
> > > > 0224 2139: 41 5d pop %r13
> > > > 0226 213b: 41 5e pop %r14
> > > > 0228 213d: 41 5f pop %r15
> > > > 022a 213f: 5d pop %rbp
> > > > 022b 2140: c3 retq
>
> This "fixes" it, and also seems to help -Os make much code:

much *smaller* code

>
> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index 445348facea9..8de63db58fdd 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
> .line = __LINE__, \
> }; \
> ______r = !!(cond); \
> - ______f.miss_hit[______r]++; \
> + if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
> ______r; \
> }))
> #endif /* CONFIG_PROFILE_ALL_BRANCHES */

--
Josh

2019-03-07 17:03:06

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 7, 2019 at 8:47 AM Josh Poimboeuf <[email protected]> wrote:
>
> This "fixes" it, and also seems to help -Os make much code:

Yeah, considering that this __trace_if() macro from hell is doing an
'if()' on the result of that inner thing, it makes sense to *not* use
that "looks simpler and shorter" array sequence, but depend on the
compiler then noticing that the conditionals are the same and joining
the two branches together.

The compiler has to do much more work to either generate the actual
dynamic array thing, or notice that the _index_ of the array matches
the _conditional_ on the branch, and undo that thing.

But that macro really is the macro from hell regardless.

Do people really use CONFIG_PROFILE_ALL_BRANCHES?

Linus

2019-03-07 17:13:33

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On March 7, 2019 8:47:05 AM PST, Josh Poimboeuf <[email protected]> wrote:
>On Thu, Mar 07, 2019 at 02:13:12PM +0100, Peter Zijlstra wrote:
>> On Thu, Mar 07, 2019 at 01:55:26PM +0100, Peter Zijlstra wrote:
>> > On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:
>>
>>
>> > > 01be 20d3: 31 c0 xor %eax,%eax
>> > > 01c0 20d5: 4c 39 eb cmp %r13,%rbx
>> > > 01c3 20d8: 77 08 ja 20e2
><__do_sys_waitid+0x1cd>
>>
>> randconfig-build/kernel/exit.o: warning: objtool:
>__do_sys_waitid()+0x1c3: (branch)
>>
>> > > 01cd 20e2: 83 f0 01 xor $0x1,%eax
>> > > 01d0 20e5: 48 89 c2 mov %rax,%rdx
>> > > 01d3 20e8: 83 e2 01 and $0x1,%edx
>> > > 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
>> > > 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
>> > > 01e1 20f6: 00
>> > > 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
>> > > 01e2 20f7: 84 c0 test %al,%al
>> > > 01e4 20f9: 75 2d jne 2128
><__do_sys_waitid+0x213>
>> >
>> > we do not take this branch and fall-through.
>>
>> And that is the error, I think. We should've taken it and went to:
>>
>> return -EFAULT;
>>
>> because:
>>
>> +1be xor %eax,%eax eax=0
>> +1cd xor $0x1,%eax eax=1
>> +1e2 test %al,%al 1&1==1 -> ZF=0
>> +1e4 jnz
>>
>> Is an unconditional code sequence, but there's no way objtool can do
>> that without becoming a full blown interpreter :/
>>
>> > > 0213 2128: 49 c7 c7 f2 ff ff ff mov
>$0xfffffffffffffff2,%r15
>> > > ffffffffffffe0eb }
>> > > 021a 212f: 48 8d 65 d8 lea -0x28(%rbp),%rsp
>> > > 021e 2133: 4c 89 f8 mov %r15,%rax
>> > > 0221 2136: 5b pop %rbx
>> > > 0222 2137: 41 5c pop %r12
>> > > 0224 2139: 41 5d pop %r13
>> > > 0226 213b: 41 5e pop %r14
>> > > 0228 213d: 41 5f pop %r15
>> > > 022a 213f: 5d pop %rbp
>> > > 022b 2140: c3 retq
>
>This "fixes" it, and also seems to help -Os make much code:
>
>diff --git a/include/linux/compiler.h b/include/linux/compiler.h
>index 445348facea9..8de63db58fdd 100644
>--- a/include/linux/compiler.h
>+++ b/include/linux/compiler.h
>@@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data
>*f, int val,
> .line = __LINE__, \
> }; \
> ______r = !!(cond); \
>- ______f.miss_hit[______r]++; \
>+ if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
> ______r; \
> }))
> #endif /* CONFIG_PROFILE_ALL_BRANCHES */

if (cond)? Or is ___r used elsewhere?
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.

2019-03-07 17:17:57

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On March 7, 2019 3:45:11 AM PST, Peter Zijlstra <[email protected]> wrote:
>Teach objtool to validate the UACCESS (SMAP, PAN) rules with are
>currently
>unenforced and (therefore obviously) violated.
>
>UACCESS sections should be small; we want to limit the amount of code
>that can
>touch userspace. Furthermore, UACCESS state isn't scheduled, this means
>that
>anything that directly calls into the scheduler will result in random
>code
>running with UACCESS enabled and possibly getting back into the UACCESS
>region
>with UACCESS disabled and causing faults.
>
>Forbid any CALL/RET while UACCESS is enabled; but provide a few
>exceptions.
>
>This builds x86_64-allmodconfig clean, and I've only got a few
>randconfig
>failures left (GCC-8) that I'm not quite understanding.
>
>---
> arch/x86/ia32/ia32_signal.c | 29 ++-
> arch/x86/include/asm/asm.h | 24 --
> arch/x86/include/asm/nospec-branch.h | 4 +-
> arch/x86/include/asm/smap.h | 20 ++
> arch/x86/include/asm/uaccess.h | 5 +-
> arch/x86/include/asm/uaccess_64.h | 3 -
> arch/x86/include/asm/xen/hypercall.h | 26 +-
> arch/x86/kernel/signal.c | 2 +-
> arch/x86/lib/copy_user_64.S | 48 ++++
> arch/x86/lib/memcpy_64.S | 3 +-
> arch/x86/lib/usercopy_64.c | 20 --
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +-
> include/linux/uaccess.h | 2 +
> kernel/trace/trace_branch.c | 4 +
> lib/Makefile | 1 +
> lib/ubsan.c | 4 +
> mm/kasan/Makefile | 3 +
> mm/kasan/common.c | 10 +
> mm/kasan/report.c | 3 +-
> scripts/Makefile.build | 3 +
> tools/objtool/Makefile | 2 +-
> tools/objtool/arch.h | 8 +-
> tools/objtool/arch/x86/decode.c | 26 +-
> tools/objtool/builtin-check.c | 4 +-
> tools/objtool/builtin.h | 2 +-
>tools/objtool/check.c | 382
>++++++++++++++++++++++-------
> tools/objtool/check.h | 4 +-
> tools/objtool/elf.c | 15 +-
> tools/objtool/elf.h | 3 +-
> tools/objtool/special.c | 10 +-
> tools/objtool/warn.h | 8 +
> 31 files changed, 511 insertions(+), 173 deletions(-)
>
>
>

This is phenomenal. Thank you so much for digging into this. I'm hoping this will greatly reduce the risk of future leakage.
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.

2019-03-07 17:18:30

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 09:00:49AM -0800, Linus Torvalds wrote:
> On Thu, Mar 7, 2019 at 8:47 AM Josh Poimboeuf <[email protected]> wrote:
> >
> > This "fixes" it, and also seems to help -Os make much code:
>
> Yeah, considering that this __trace_if() macro from hell is doing an
> 'if()' on the result of that inner thing, it makes sense to *not* use
> that "looks simpler and shorter" array sequence, but depend on the
> compiler then noticing that the conditionals are the same and joining
> the two branches together.
>
> The compiler has to do much more work to either generate the actual
> dynamic array thing, or notice that the _index_ of the array matches
> the _conditional_ on the branch, and undo that thing.

Yeah, agreed. Now it doesn't have to do the funky xor thing to convert
the conditional to an int.

> But that macro really is the macro from hell regardless.
>
> Do people really use CONFIG_PROFILE_ALL_BRANCHES?

IIRC, Steven runs it once a year or so...

--
Josh

2019-03-07 17:19:15

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 09:04:36AM -0800, [email protected] wrote:
> On March 7, 2019 8:47:05 AM PST, Josh Poimboeuf <[email protected]> wrote:
> >On Thu, Mar 07, 2019 at 02:13:12PM +0100, Peter Zijlstra wrote:
> >> On Thu, Mar 07, 2019 at 01:55:26PM +0100, Peter Zijlstra wrote:
> >> > On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:
> >>
> >>
> >> > > 01be 20d3: 31 c0 xor %eax,%eax
> >> > > 01c0 20d5: 4c 39 eb cmp %r13,%rbx
> >> > > 01c3 20d8: 77 08 ja 20e2
> ><__do_sys_waitid+0x1cd>
> >>
> >> randconfig-build/kernel/exit.o: warning: objtool:
> >__do_sys_waitid()+0x1c3: (branch)
> >>
> >> > > 01cd 20e2: 83 f0 01 xor $0x1,%eax
> >> > > 01d0 20e5: 48 89 c2 mov %rax,%rdx
> >> > > 01d3 20e8: 83 e2 01 and $0x1,%edx
> >> > > 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
> >> > > 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
> >> > > 01e1 20f6: 00
> >> > > 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
> >> > > 01e2 20f7: 84 c0 test %al,%al
> >> > > 01e4 20f9: 75 2d jne 2128
> ><__do_sys_waitid+0x213>
> >> >
> >> > we do not take this branch and fall-through.
> >>
> >> And that is the error, I think. We should've taken it and went to:
> >>
> >> return -EFAULT;
> >>
> >> because:
> >>
> >> +1be xor %eax,%eax eax=0
> >> +1cd xor $0x1,%eax eax=1
> >> +1e2 test %al,%al 1&1==1 -> ZF=0
> >> +1e4 jnz
> >>
> >> Is an unconditional code sequence, but there's no way objtool can do
> >> that without becoming a full blown interpreter :/
> >>
> >> > > 0213 2128: 49 c7 c7 f2 ff ff ff mov
> >$0xfffffffffffffff2,%r15
> >> > > ffffffffffffe0eb }
> >> > > 021a 212f: 48 8d 65 d8 lea -0x28(%rbp),%rsp
> >> > > 021e 2133: 4c 89 f8 mov %r15,%rax
> >> > > 0221 2136: 5b pop %rbx
> >> > > 0222 2137: 41 5c pop %r12
> >> > > 0224 2139: 41 5d pop %r13
> >> > > 0226 213b: 41 5e pop %r14
> >> > > 0228 213d: 41 5f pop %r15
> >> > > 022a 213f: 5d pop %rbp
> >> > > 022b 2140: c3 retq
> >
> >This "fixes" it, and also seems to help -Os make much code:
> >
> >diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> >index 445348facea9..8de63db58fdd 100644
> >--- a/include/linux/compiler.h
> >+++ b/include/linux/compiler.h
> >@@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data
> >*f, int val,
> > .line = __LINE__, \
> > }; \
> > ______r = !!(cond); \
> >- ______f.miss_hit[______r]++; \
> >+ if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
> > ______r; \
> > }))
> > #endif /* CONFIG_PROFILE_ALL_BRANCHES */
>
> if (cond)? Or is ___r used elsewhere?

______r is also the return value. And it's needed because cond should
only be evaluated once.

--
Josh

2019-03-07 17:31:32

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On March 7, 2019 9:18:29 AM PST, Josh Poimboeuf <[email protected]> wrote:
>On Thu, Mar 07, 2019 at 09:04:36AM -0800, [email protected] wrote:
>> On March 7, 2019 8:47:05 AM PST, Josh Poimboeuf <[email protected]>
>wrote:
>> >On Thu, Mar 07, 2019 at 02:13:12PM +0100, Peter Zijlstra wrote:
>> >> On Thu, Mar 07, 2019 at 01:55:26PM +0100, Peter Zijlstra wrote:
>> >> > On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:
>> >>
>> >>
>> >> > > 01be 20d3: 31 c0 xor %eax,%eax
>> >> > > 01c0 20d5: 4c 39 eb cmp %r13,%rbx
>> >> > > 01c3 20d8: 77 08 ja 20e2
>> ><__do_sys_waitid+0x1cd>
>> >>
>> >> randconfig-build/kernel/exit.o: warning: objtool:
>> >__do_sys_waitid()+0x1c3: (branch)
>> >>
>> >> > > 01cd 20e2: 83 f0 01 xor $0x1,%eax
>> >> > > 01d0 20e5: 48 89 c2 mov %rax,%rdx
>> >> > > 01d3 20e8: 83 e2 01 and $0x1,%edx
>> >> > > 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
>> >> > > 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
>> >> > > 01e1 20f6: 00
>> >> > > 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
>> >> > > 01e2 20f7: 84 c0 test %al,%al
>> >> > > 01e4 20f9: 75 2d jne 2128
>> ><__do_sys_waitid+0x213>
>> >> >
>> >> > we do not take this branch and fall-through.
>> >>
>> >> And that is the error, I think. We should've taken it and went to:
>> >>
>> >> return -EFAULT;
>> >>
>> >> because:
>> >>
>> >> +1be xor %eax,%eax eax=0
>> >> +1cd xor $0x1,%eax eax=1
>> >> +1e2 test %al,%al 1&1==1 -> ZF=0
>> >> +1e4 jnz
>> >>
>> >> Is an unconditional code sequence, but there's no way objtool can
>do
>> >> that without becoming a full blown interpreter :/
>> >>
>> >> > > 0213 2128: 49 c7 c7 f2 ff ff ff mov
>> >$0xfffffffffffffff2,%r15
>> >> > > ffffffffffffe0eb }
>> >> > > 021a 212f: 48 8d 65 d8 lea
>-0x28(%rbp),%rsp
>> >> > > 021e 2133: 4c 89 f8 mov %r15,%rax
>> >> > > 0221 2136: 5b pop %rbx
>> >> > > 0222 2137: 41 5c pop %r12
>> >> > > 0224 2139: 41 5d pop %r13
>> >> > > 0226 213b: 41 5e pop %r14
>> >> > > 0228 213d: 41 5f pop %r15
>> >> > > 022a 213f: 5d pop %rbp
>> >> > > 022b 2140: c3 retq
>> >
>> >This "fixes" it, and also seems to help -Os make much code:
>> >
>> >diff --git a/include/linux/compiler.h b/include/linux/compiler.h
>> >index 445348facea9..8de63db58fdd 100644
>> >--- a/include/linux/compiler.h
>> >+++ b/include/linux/compiler.h
>> >@@ -67,7 +67,7 @@ void ftrace_likely_update(struct
>ftrace_likely_data
>> >*f, int val,
>> > .line = __LINE__, \
>> > }; \
>> > ______r = !!(cond); \
>> >- ______f.miss_hit[______r]++; \
>> >+ if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
>> > ______r; \
>> > }))
>> > #endif /* CONFIG_PROFILE_ALL_BRANCHES */
>>
>> if (cond)? Or is ___r used elsewhere?
>
>______r is also the return value. And it's needed because cond should
>only be evaluated once.

So put a true; and false; inside the if.
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.

2019-03-07 17:39:27

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 11:17:09AM -0600, Josh Poimboeuf wrote:
> On Thu, Mar 07, 2019 at 09:00:49AM -0800, Linus Torvalds wrote:
> > But that macro really is the macro from hell regardless.
> >
> > Do people really use CONFIG_PROFILE_ALL_BRANCHES?
>
> IIRC, Steven runs it once a year or so...

That's TRACE_BRANCH_PROFILING; PROFILE_ALL_BRANCHES I don't know, that
mostly just gets in the way I think.

Also; it seems to me that something PT, or maybe even simply:

perf -e branches -e branch-misses

would get you similar or sufficient information.

2019-03-07 17:45:40

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 10:47:05AM -0600, Josh Poimboeuf wrote:

> This "fixes" it, and also seems to help -Os make much code:
>
> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index 445348facea9..8de63db58fdd 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
> .line = __LINE__, \
> }; \
> ______r = !!(cond); \
> - ______f.miss_hit[______r]++; \
> + if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
> ______r; \
> }))
> #endif /* CONFIG_PROFILE_ALL_BRANCHES */

Excellen; let me put the kids to bed and then I'll have a poke.

2019-03-07 17:49:18

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 09:29:17AM -0800, [email protected] wrote:
> On March 7, 2019 9:18:29 AM PST, Josh Poimboeuf <[email protected]> wrote:
> >On Thu, Mar 07, 2019 at 09:04:36AM -0800, [email protected] wrote:
> >> On March 7, 2019 8:47:05 AM PST, Josh Poimboeuf <[email protected]>
> >wrote:
> >> >On Thu, Mar 07, 2019 at 02:13:12PM +0100, Peter Zijlstra wrote:
> >> >> On Thu, Mar 07, 2019 at 01:55:26PM +0100, Peter Zijlstra wrote:
> >> >> > On Thu, Mar 07, 2019 at 01:03:17PM +0100, Peter Zijlstra wrote:
> >> >>
> >> >>
> >> >> > > 01be 20d3: 31 c0 xor %eax,%eax
> >> >> > > 01c0 20d5: 4c 39 eb cmp %r13,%rbx
> >> >> > > 01c3 20d8: 77 08 ja 20e2
> >> ><__do_sys_waitid+0x1cd>
> >> >>
> >> >> randconfig-build/kernel/exit.o: warning: objtool:
> >> >__do_sys_waitid()+0x1c3: (branch)
> >> >>
> >> >> > > 01cd 20e2: 83 f0 01 xor $0x1,%eax
> >> >> > > 01d0 20e5: 48 89 c2 mov %rax,%rdx
> >> >> > > 01d3 20e8: 83 e2 01 and $0x1,%edx
> >> >> > > 01d6 20eb: 48 83 c2 02 add $0x2,%rdx
> >> >> > > 01da 20ef: 48 ff 04 d5 00 00 00 incq 0x0(,%rdx,8)
> >> >> > > 01e1 20f6: 00
> >> >> > > 01de 20f3: R_X86_64_32S _ftrace_branch+0x148
> >> >> > > 01e2 20f7: 84 c0 test %al,%al
> >> >> > > 01e4 20f9: 75 2d jne 2128
> >> ><__do_sys_waitid+0x213>
> >> >> >
> >> >> > we do not take this branch and fall-through.
> >> >>
> >> >> And that is the error, I think. We should've taken it and went to:
> >> >>
> >> >> return -EFAULT;
> >> >>
> >> >> because:
> >> >>
> >> >> +1be xor %eax,%eax eax=0
> >> >> +1cd xor $0x1,%eax eax=1
> >> >> +1e2 test %al,%al 1&1==1 -> ZF=0
> >> >> +1e4 jnz
> >> >>
> >> >> Is an unconditional code sequence, but there's no way objtool can
> >do
> >> >> that without becoming a full blown interpreter :/
> >> >>
> >> >> > > 0213 2128: 49 c7 c7 f2 ff ff ff mov
> >> >$0xfffffffffffffff2,%r15
> >> >> > > ffffffffffffe0eb }
> >> >> > > 021a 212f: 48 8d 65 d8 lea
> >-0x28(%rbp),%rsp
> >> >> > > 021e 2133: 4c 89 f8 mov %r15,%rax
> >> >> > > 0221 2136: 5b pop %rbx
> >> >> > > 0222 2137: 41 5c pop %r12
> >> >> > > 0224 2139: 41 5d pop %r13
> >> >> > > 0226 213b: 41 5e pop %r14
> >> >> > > 0228 213d: 41 5f pop %r15
> >> >> > > 022a 213f: 5d pop %rbp
> >> >> > > 022b 2140: c3 retq
> >> >
> >> >This "fixes" it, and also seems to help -Os make much code:
> >> >
> >> >diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> >> >index 445348facea9..8de63db58fdd 100644
> >> >--- a/include/linux/compiler.h
> >> >+++ b/include/linux/compiler.h
> >> >@@ -67,7 +67,7 @@ void ftrace_likely_update(struct
> >ftrace_likely_data
> >> >*f, int val,
> >> > .line = __LINE__, \
> >> > }; \
> >> > ______r = !!(cond); \
> >> >- ______f.miss_hit[______r]++; \
> >> >+ if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
> >> > ______r; \
> >> > }))
> >> > #endif /* CONFIG_PROFILE_ALL_BRANCHES */
> >>
> >> if (cond)? Or is ___r used elsewhere?
> >
> >______r is also the return value. And it's needed because cond should
> >only be evaluated once.
>
> So put a true; and false; inside the if.

Is that possible to do in a C macro? Doesn't seem to work for me...

--
Josh

2019-03-07 17:49:21

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 06:43:22PM +0100, Peter Zijlstra wrote:
> On Thu, Mar 07, 2019 at 10:47:05AM -0600, Josh Poimboeuf wrote:
>
> > This "fixes" it, and also seems to help -Os make much code:
> >
> > diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> > index 445348facea9..8de63db58fdd 100644
> > --- a/include/linux/compiler.h
> > +++ b/include/linux/compiler.h
> > @@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
> > .line = __LINE__, \
> > }; \
> > ______r = !!(cond); \
> > - ______f.miss_hit[______r]++; \
> > + if (______r) ______f.miss_hit[1]++; else ______f.miss_hit[0]++; \
> > ______r; \
> > }))
> > #endif /* CONFIG_PROFILE_ALL_BRANCHES */
>
> Excellen; let me put the kids to bed and then I'll have a poke.

Here's a proper patch.

From: Josh Poimboeuf <[email protected]>
Subject: [PATCH] tracing: Improve "if" macro code generation

With CONFIG_PROFILE_ALL_BRANCHES, the "if" macro converts the
conditional to an array index. This can cause GCC to create horrible
code. When there are nested ifs, the generated code uses register
values to encode branching decisions.

Make it easier for GCC to optimize by keeping the conditional as a
conditional rather than converting it to an integer. This shrinks the
generated code quite a bit, and also makes the code sane enough for
objtool to understand.

Reported-by: Peter Zijlstra <[email protected]>
Signed-off-by: Josh Poimboeuf <[email protected]>
---
include/linux/compiler.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 445348facea9..d58aa0db05f9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
.line = __LINE__, \
}; \
______r = !!(cond); \
- ______f.miss_hit[______r]++; \
+ ______r ? ______f.miss_hit[1]++ : ______f.miss_hit[0]++;\
______r; \
}))
#endif /* CONFIG_PROFILE_ALL_BRANCHES */
--
2.17.2


2019-03-07 17:49:57

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 7, 2019 at 9:46 AM Josh Poimboeuf <[email protected]> wrote:
>
> Is that possible to do in a C macro? Doesn't seem to work for me...

The meat of that macro could easily be done as a helper inline function.

But as mentioned, I think a better option would be to remove it
entirely, if at all possible.

The best part about that config option is the comment, and while cute
I don't think that's really worth saving it for...

Linus

2019-03-07 17:49:58

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 7, 2019 at 9:38 AM Peter Zijlstra <[email protected]> wrote:
>
> Also; it seems to me that something PT, or maybe even simply:
>
> perf -e branches -e branch-misses
>
> would get you similar or sufficient information.

Yeah, I'm not really seeing a lot of upside to PROFILE_ALL_BRANCHES.

Particularly since it doesn't actually profile all branches at all. It
only basically profiles "if ()" statements, which obviously misses
loops etc, but then also _does_ hit things where people turned loops
into "if (unlikely()) loop()", which happens in (for example)
low-level locking code etc that often has a fast-case "first try"
thing followed by a slow-case "ok, let's loop for it" thing.

So I think PROFILE_ALL_BRANCHES tends to have very random coverage.
I'd love to get rid of it, because it seems so random.

Linus

2019-03-07 18:19:31

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, 7 Mar 2019 09:45:35 -0800
Linus Torvalds <[email protected]> wrote:

> On Thu, Mar 7, 2019 at 9:38 AM Peter Zijlstra <[email protected]> wrote:
> >
> > Also; it seems to me that something PT, or maybe even simply:
> >
> > perf -e branches -e branch-misses
> >
> > would get you similar or sufficient information.
>
> Yeah, I'm not really seeing a lot of upside to PROFILE_ALL_BRANCHES.
>
> Particularly since it doesn't actually profile all branches at all. It
> only basically profiles "if ()" statements, which obviously misses
> loops etc, but then also _does_ hit things where people turned loops
> into "if (unlikely()) loop()", which happens in (for example)
> low-level locking code etc that often has a fast-case "first try"
> thing followed by a slow-case "ok, let's loop for it" thing.
>
> So I think PROFILE_ALL_BRANCHES tends to have very random coverage.
> I'd love to get rid of it, because it seems so random.
>

As Josh said, I run it once a year on two production (real use)
machines for 2 to 4 weeks and collect the data to see if there are
places that can be optimized better.

I currently have one of my engineers looking at the data and may be
sending patches soon. It's basically an entry level way to get into
kernel development. Note, no patch will be sent just because of the
data from the profiling. The task is to look at and understand the
code, and see if it can be optimized (with likely/unlikely or flow
changes). It's a way to get a better understanding of the kernel in
various locations. It is by no means "profiler said this, lets change
it." All changes must be rational, and make sense. The profiler is only
used to help find those places.

The data that was run at the end of January can be found here:

http://rostedt.homelinux.com/branches/mammoth-branches-2019/
http://rostedt.homelinux.com/branches/gandalf-branches-2019/

-- Steve

2019-03-10 13:18:55

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 00/20] objtool: UACCESS validation v3

On Thu, Mar 07, 2019 at 01:18:41PM -0500, Steven Rostedt wrote:
> On Thu, 7 Mar 2019 09:45:35 -0800
> Linus Torvalds <[email protected]> wrote:
>
> > On Thu, Mar 7, 2019 at 9:38 AM Peter Zijlstra <[email protected]> wrote:
> > >
> > > Also; it seems to me that something PT, or maybe even simply:
> > >
> > > perf -e branches -e branch-misses
> > >
> > > would get you similar or sufficient information.

> I currently have one of my engineers looking at the data and may be
> sending patches soon. It's basically an entry level way to get into
> kernel development. Note, no patch will be sent just because of the
> data from the profiling. The task is to look at and understand the
> code, and see if it can be optimized (with likely/unlikely or flow
> changes). It's a way to get a better understanding of the kernel in
> various locations. It is by no means "profiler said this, lets change
> it." All changes must be rational, and make sense. The profiler is only
> used to help find those places.

Can't you just have those same engineers look at perf data? This seems
like a very expensive and convoluted way of getting something.


Subject: [tip:core/objtool] tracing: Improve "if" macro code generation

Commit-ID: 37686b1353cfc30e127cef811959cdbcd0495d98
Gitweb: https://git.kernel.org/tip/37686b1353cfc30e127cef811959cdbcd0495d98
Author: Josh Poimboeuf <[email protected]>
AuthorDate: Thu, 7 Mar 2019 11:48:02 -0600
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 3 Apr 2019 09:36:27 +0200

tracing: Improve "if" macro code generation

With CONFIG_PROFILE_ALL_BRANCHES=y, the "if" macro converts the
conditional to an array index. This can cause GCC to create horrible
code. When there are nested ifs, the generated code uses register
values to encode branching decisions.

Make it easier for GCC to optimize by keeping the conditional as a
conditional rather than converting it to an integer. This shrinks the
generated code quite a bit, and also makes the code sane enough for
objtool to understand.

Reported-by: Peter Zijlstra <[email protected]>
Signed-off-by: Josh Poimboeuf <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/20190307174802.46fmpysxyo35hh43@treble
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/compiler.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 445348facea9..d58aa0db05f9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
.line = __LINE__, \
}; \
______r = !!(cond); \
- ______f.miss_hit[______r]++; \
+ ______r ? ______f.miss_hit[1]++ : ______f.miss_hit[0]++;\
______r; \
}))
#endif /* CONFIG_PROFILE_ALL_BRANCHES */

2019-04-19 18:38:19

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] compiler.h, tracing: Remove CONFIG_PROFILE_ALL_BRANCHES

On Fri, 19 Apr 2019 12:08:37 +0200
Ingo Molnar <[email protected]> wrote:

> > Can't you just have those same engineers look at perf data? This seems
> > like a very expensive and convoluted way of getting something.

I haven't tried the perf data. How well does it work with running over
a 2 weeks to a month period? That's what I do yearly. Here's the
results of my last run:

http://rostedt.homelinux.com/branches/gandalf-branches-2019/brach_all-2019-02-05

http://rostedt.homelinux.com/branches/mammoth-branches-2019/branch_all-2019-01-02
http://rostedt.homelinux.com/branches/mammoth-branches-2019/branch_all-2019-01-03
http://rostedt.homelinux.com/branches/mammoth-branches-2019/branch_all-2019-01-17
http://rostedt.homelinux.com/branches/mammoth-branches-2019/branch_all-2019-02-05

I have a cron job that runs nightly that copies the current state, and
if the machine reboots, it starts a new file (which is why there's
multiple files for mammoth - it rebooted).

>
> So since no-one offered objections to using perf branch profiling instead
> (which method allows so much more than CONFIG_PROFILE_ALL_BRANCHES: such

I've never used it, so I have no idea if it is suitable or not.

> as profiling glibc and other user-space, or allowing to branch-profile
> the kernel is an uninstrumented form not distorted by
> CONFIG_PROFILE_ALL_BRANCHES code generation artifacts), lemme propose the
> attached patch to remove if-tracing.
>
> If the CONFIG_PROFILE_ALL_BRANCHES=y feature is required for anyone it
> can still be reverted privately or maintained out of tree - no need to
> burden the mainline kernel with this.

But is it a real burden? It's been in the kernel for over 10 years
with very little issue. Only when we do something drastic does it show
up, and it's usually a quick fix to get it working again.

I believe Josh even told me that it found a bug in the objtool code, so
it does still have benefit staying in the kernel even without people
using it for profiling.

Note, I'm in the middle of writing a LWN article about learning the
kernel from branch profiling and it would be a shame if it disappears
before I finish it.

-- Steve


>
> I've build tested this and it Looks Perfect Here™.


2019-04-19 19:13:39

by Ingo Molnar

[permalink] [raw]
Subject: [PATCH] compiler.h, tracing: Remove CONFIG_PROFILE_ALL_BRANCHES


* Peter Zijlstra <[email protected]> wrote:

> On Thu, Mar 07, 2019 at 01:18:41PM -0500, Steven Rostedt wrote:
> > On Thu, 7 Mar 2019 09:45:35 -0800
> > Linus Torvalds <[email protected]> wrote:
> >
> > > On Thu, Mar 7, 2019 at 9:38 AM Peter Zijlstra <[email protected]> wrote:
> > > >
> > > > Also; it seems to me that something PT, or maybe even simply:
> > > >
> > > > perf -e branches -e branch-misses
> > > >
> > > > would get you similar or sufficient information.
>
> > I currently have one of my engineers looking at the data and may be
> > sending patches soon. It's basically an entry level way to get into
> > kernel development. Note, no patch will be sent just because of the
> > data from the profiling. The task is to look at and understand the
> > code, and see if it can be optimized (with likely/unlikely or flow
> > changes). It's a way to get a better understanding of the kernel in
> > various locations. It is by no means "profiler said this, lets change
> > it." All changes must be rational, and make sense. The profiler is only
> > used to help find those places.
>
> Can't you just have those same engineers look at perf data? This seems
> like a very expensive and convoluted way of getting something.

So since no-one offered objections to using perf branch profiling instead
(which method allows so much more than CONFIG_PROFILE_ALL_BRANCHES: such
as profiling glibc and other user-space, or allowing to branch-profile
the kernel is an uninstrumented form not distorted by
CONFIG_PROFILE_ALL_BRANCHES code generation artifacts), lemme propose the
attached patch to remove if-tracing.

If the CONFIG_PROFILE_ALL_BRANCHES=y feature is required for anyone it
can still be reverted privately or maintained out of tree - no need to
burden the mainline kernel with this.

I've build tested this and it Looks Perfect Here™.

Thanks,

Ingo

=============================>
From 3f689ed8a1555aabead90e015a47aefddd2a4e25 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <[email protected]>
Date: Fri, 19 Apr 2019 11:42:43 +0200
Subject: [PATCH] compiler.h, tracing: Remove CONFIG_PROFILE_ALL_BRANCHES

Redefining 'if' in compiler.h was a hideously wonderful hack back in 2008 when
we merged it via:

2bcd521a684c: ("trace: profile all if conditionals")

Meanwhile the 'wonderful' novelty part wore off a bit in that decade, and
what remained is the 'hideous', mostly. ;-)

Meanwhile #2: we also merged perf and hardware branch tracing
capabilities (branch-miss events, but also BTS and aux hw tracing),
which can collect similar data and so much more:

$ perf -e branches -e branch-misses

So let's remove this constant source of headaches for good. Anyone truly
interested in this feature can revert this commit and/or maintain it out
of tree - but the upstream pain isn't really worth it.

Signed-off-by: Ingo Molnar <[email protected]>
---
include/asm-generic/vmlinux.lds.h | 11 +----
include/linux/compiler.h | 24 -----------
kernel/trace/Kconfig | 17 --------
kernel/trace/trace_branch.c | 66 -----------------------------
tools/perf/tests/bpf-script-test-prologue.c | 9 ----
5 files changed, 1 insertion(+), 126 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f8f6f04c4453..9c477b2136c2 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -126,14 +126,6 @@
#define LIKELY_PROFILE()
#endif

-#ifdef CONFIG_PROFILE_ALL_BRANCHES
-#define BRANCH_PROFILE() __start_branch_profile = .; \
- KEEP(*(_ftrace_branch)) \
- __stop_branch_profile = .;
-#else
-#define BRANCH_PROFILE()
-#endif
-
#ifdef CONFIG_KPROBES
#define KPROBE_BLACKLIST() . = ALIGN(8); \
__start_kprobe_blacklist = .; \
@@ -266,8 +258,7 @@
__start___verbose = .; \
KEEP(*(__verbose)) \
__stop___verbose = .; \
- LIKELY_PROFILE() \
- BRANCH_PROFILE() \
+ LIKELY_PROFILE() \
TRACE_PRINTKS() \
BPF_RAW_TP() \
TRACEPOINT_STR()
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d58aa0db05f9..c63105451c6a 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -48,30 +48,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
# define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x)))
# endif

-#ifdef CONFIG_PROFILE_ALL_BRANCHES
-/*
- * "Define 'is'", Bill Clinton
- * "Define 'if'", Steven Rostedt
- */
-#define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) )
-#define __trace_if(cond) \
- if (__builtin_constant_p(!!(cond)) ? !!(cond) : \
- ({ \
- int ______r; \
- static struct ftrace_branch_data \
- __aligned(4) \
- __section("_ftrace_branch") \
- ______f = { \
- .func = __func__, \
- .file = __FILE__, \
- .line = __LINE__, \
- }; \
- ______r = !!(cond); \
- ______r ? ______f.miss_hit[1]++ : ______f.miss_hit[0]++;\
- ______r; \
- }))
-#endif /* CONFIG_PROFILE_ALL_BRANCHES */
-
#else
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8bd1d6d001d7..169c34e0f16d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -366,23 +366,6 @@ config PROFILE_ANNOTATED_BRANCHES

Note: this will add a significant overhead; only turn this
on if you need to profile the system's use of these macros.
-
-config PROFILE_ALL_BRANCHES
- bool "Profile all if conditionals" if !FORTIFY_SOURCE
- select TRACE_BRANCH_PROFILING
- imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives
- help
- This tracer profiles all branch conditions. Every if ()
- taken in the kernel is recorded whether it hit or miss.
- The results will be displayed in:
-
- /sys/kernel/debug/tracing/trace_stat/branch_all
-
- This option also enables the likely/unlikely profiler.
-
- This configuration, when enabled, will impose a great overhead
- on the system. This should only be enabled when the system
- is to be analyzed in much detail.
endchoice

config TRACING_BRANCHES
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 3ea65cdff30d..be75301a9963 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -387,69 +387,3 @@ __init static int init_annotated_branch_stats(void)
return 0;
}
fs_initcall(init_annotated_branch_stats);
-
-#ifdef CONFIG_PROFILE_ALL_BRANCHES
-
-extern unsigned long __start_branch_profile[];
-extern unsigned long __stop_branch_profile[];
-
-static int all_branch_stat_headers(struct seq_file *m)
-{
- seq_puts(m, " miss hit % "
- " Function "
- " File Line\n"
- " ------- --------- - "
- " -------- "
- " ---- ----\n");
- return 0;
-}
-
-static void *all_branch_stat_start(struct tracer_stat *trace)
-{
- return __start_branch_profile;
-}
-
-static void *
-all_branch_stat_next(void *v, int idx)
-{
- struct ftrace_branch_data *p = v;
-
- ++p;
-
- if ((void *)p >= (void *)__stop_branch_profile)
- return NULL;
-
- return p;
-}
-
-static int all_branch_stat_show(struct seq_file *m, void *v)
-{
- struct ftrace_branch_data *p = v;
- const char *f;
-
- f = branch_stat_process_file(p);
- return branch_stat_show_normal(m, p, f);
-}
-
-static struct tracer_stat all_branch_stats = {
- .name = "branch_all",
- .stat_start = all_branch_stat_start,
- .stat_next = all_branch_stat_next,
- .stat_headers = all_branch_stat_headers,
- .stat_show = all_branch_stat_show
-};
-
-__init static int all_annotated_branch_stats(void)
-{
- int ret;
-
- ret = register_stat_tracer(&all_branch_stats);
- if (!ret) {
- printk(KERN_WARNING "Warning: could not register "
- "all branches stats\n");
- return 1;
- }
- return 0;
-}
-fs_initcall(all_annotated_branch_stats);
-#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
index 43f1e16486f4..1f048bd89b0d 100644
--- a/tools/perf/tests/bpf-script-test-prologue.c
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -10,15 +10,6 @@

#include <uapi/linux/fs.h>

-/*
- * If CONFIG_PROFILE_ALL_BRANCHES is selected,
- * 'if' is redefined after include kernel header.
- * Recover 'if' for BPF object code.
- */
-#ifdef if
-# undef if
-#endif
-
#define FMODE_READ 0x1
#define FMODE_WRITE 0x2