This patch kit implements clearing of all unused registers on kernel entries,
including system calls and all exceptions and interrupt.
This doesn't fix any known issue, but will make it harder in general
to exploit the kernel with speculation because it will be harder
to get user controlled values into kernel code.
The patchkit is a bit more complicated because it attempts to clear
unused argument registers, which requires on 64bit to know how
many arguments each system call has. I used some scripting
to derive the number of system calls from the SYSCALL_DEFINE*s
and add it to the x86 system call tables.
Everything else is relatively simple and straight forward,
and could be used independently.
I assume this mostly isn't 4.15 material, but should be considered for 4.16
Possibly some of the simpler patches could be considered for 4.15
Original patches were from Tim Chen, but changed significantly
by AK.
git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git spec/clearregs-1
v1: Initial post
From: Andi Kleen <[email protected]>
Remove the partial stack frame in the 64bit syscall fast path.
In the next patch we want to clear the extra registers, which requires
to always save all registers. So remove the partial stack frame
in the syscall fast path and always save everything.
This actually simplifies the code because the ptregs stubs
are not needed anymore.
arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
arch/x86/entry/syscall_64.c | 2 +-
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
arch/x86/entry/syscall_64.c | 2 +-
2 files changed, 5 insertions(+), 54 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 58dbf7a12a05..bbdfbdd817d6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ sub $(6*8), %rsp
+ SAVE_EXTRA_REGS
+
UNWIND_HINT_REGS extra=0
/*
@@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath:
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
- /*
- * This call instruction is handled specially in stub_ptregs_64.
- * It might end up jumping to the slow path. If it jumps, RAX
- * and all argument registers are clobbered.
- */
#ifdef CONFIG_RETPOLINE
movq sys_call_table(, %rax, 8), %rax
call __x86_indirect_thunk_rax
@@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath:
TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
- addq $6*8, %rsp /* skip extra regs -- they were preserved */
- UNWIND_HINT_EMPTY
- jmp .Lpop_c_regs_except_rcx_r11_and_sysret
+ jmp syscall_return_via_sysret
1:
/*
@@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath:
*/
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
- SAVE_EXTRA_REGS
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
jmp return_from_SYSCALL_64
entry_SYSCALL64_slow_path:
/* IRQs are off. */
- SAVE_EXTRA_REGS
movq %rsp, %rdi
call do_syscall_64 /* returns with IRQs disabled */
@@ -389,7 +382,6 @@ syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
UNWIND_HINT_EMPTY
POP_EXTRA_REGS
-.Lpop_c_regs_except_rcx_r11_and_sysret:
popq %rsi /* skip r11 */
popq %r10
popq %r9
@@ -420,47 +412,6 @@ syscall_return_via_sysret:
USERGS_SYSRET64
END(entry_SYSCALL_64)
-ENTRY(stub_ptregs_64)
- /*
- * Syscalls marked as needing ptregs land here.
- * If we are on the fast path, we need to save the extra regs,
- * which we achieve by trying again on the slow path. If we are on
- * the slow path, the extra regs are already saved.
- *
- * RAX stores a pointer to the C function implementing the syscall.
- * IRQs are on.
- */
- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
- jne 1f
-
- /*
- * Called from fast path -- disable IRQs again, pop return address
- * and jump to slow path
- */
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- popq %rax
- UNWIND_HINT_REGS extra=0
- jmp entry_SYSCALL64_slow_path
-
-1:
- JMP_NOSPEC %rax /* Called from C */
-END(stub_ptregs_64)
-
-.macro ptregs_stub func
-ENTRY(ptregs_\func)
- UNWIND_HINT_FUNC
- leaq \func(%rip), %rax
- jmp stub_ptregs_64
-END(ptregs_\func)
-.endm
-
-/* Instantiate ptregs_stub for each ptregs-using syscall */
-#define __SYSCALL_64_QUAL_(sym)
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
-#include <asm/syscalls_64.h>
-
/*
* %rdi: prev task
* %rsi: next task
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 9c09775e589d..ad1ae014f943 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,7 +8,7 @@
#include <asm/syscall.h>
#define __SYSCALL_64_QUAL_(sym) sym
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
+#define __SYSCALL_64_QUAL_ptregs(sym) sym
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
--
2.14.3
From: Andi Kleen <[email protected]>
Add 64bit assembler macros to clear registers on kernel entry.
Used in followon patches.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/calling.h | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 45a63e00a6af..9444e7623185 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -172,6 +172,34 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
+ .macro CLEAR_R11_TO_R15
+ xorq %r15, %r15
+ xorq %r14, %r14
+ xorq %r13, %r13
+ xorq %r12, %r12
+ xorq %r11, %r11
+ .endm
+
+ .macro CLEAR_R8_TO_R15
+ CLEAR_R11_TO_R15
+ xorq %r10, %r10
+ xorq %r9, %r9
+ xorq %r8, %r8
+ .endm
+
+ .macro CLEAR_ALL_REGS
+ CLEAR_R8_TO_R15
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %edi, %edi
+#ifndef CONFIG_FRAME_POINTER
+ xorl %ebp, %ebp
+#endif
+ .endm
+
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
--
2.14.3
From: Andi Kleen <[email protected]>
The main system call code doesn't know how many arguments each
system call has. So generate stubs that do the clearing.
Set up macros to generate stubs to clear unused argument registers
for each system call in a 64bit kernel. This uses the syscall
argument count from the syscall tables added earlier.
Each system call will run through its stub which then clears
the registers not used for input arguments before jumping
to the real system calls. It also clears RAX.
We have to move all the __SYSCALL_* users atomically.
This is a larger patch, but it's difficult to do it
git bisect safe otherwise.
Longer term this setup will also allow to get rid
of the system call table, as it will be possible
to compute the entry point with a simple shift.
So far this is not done here.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/calling.h | 24 ++++++++++++++++++++++++
arch/x86/entry/entry_64.S | 15 +++++++++++++++
arch/x86/entry/syscall_32.c | 4 ++--
arch/x86/entry/syscall_64.c | 5 +++--
arch/x86/entry/syscalls/syscalltbl.sh | 15 ++++++++-------
arch/x86/kernel/asm-offsets_32.c | 2 +-
arch/x86/kernel/asm-offsets_64.c | 4 ++--
arch/x86/um/sys_call_table_32.c | 4 ++--
arch/x86/um/sys_call_table_64.c | 4 ++--
9 files changed, 59 insertions(+), 18 deletions(-)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9444e7623185..c89a8a8d195c 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -200,6 +200,30 @@ For 32-bit we have the following conventions - kernel is built with
#endif
.endm
+ /* Clear unused argument registers */
+
+.macro CLEAR_ARGS num
+ /* we leave EAX around because it has been already checked */
+ .if \num < 6
+ xorq %r9, %r9 # arg6
+ .endif
+ .if \num < 5
+ xorq %r8, %r8 # arg5
+ .endif
+ .if \num < 4
+ xorl %ecx, %ecx # arg4
+ .endif
+ .if \num < 3
+ xorl %edx, %edx # arg3
+ .endif
+ .if \num < 2
+ xorl %esi, %esi # arg2
+ .endif
+ .if \num < 1
+ xorl %edi, %edi # arg1
+ .endif
+.endm
+
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 6ab4c2aaeabb..5b2456a30b17 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1755,6 +1755,21 @@ nmi_restore:
iretq
END(nmi)
+/*
+ * Clear all argument registers not used by a system call.
+ */
+
+.macro gen_arg_stub sym, num
+ENTRY(san_args_\sym)
+ CLEAR_ARGS \num
+ xor %eax, %eax
+ jmp \sym
+END(san_args_\sym)
+.endm
+
+#define __SYSCALL_64(nr, sym, qual, num) gen_arg_stub sym, num
+#include <asm/syscalls_64.h>
+
ENTRY(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 95c294963612..b31e5c8b7ba7 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -7,11 +7,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual, num) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_I386(nr, sym, qual, num) [nr] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index ad1ae014f943..963c9c14480f 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -10,11 +10,12 @@
#define __SYSCALL_64_QUAL_(sym) sym
#define __SYSCALL_64_QUAL_ptregs(sym) sym
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __SYSCALL_64(nr, sym, qual, num) \
+ extern asmlinkage long __SYSCALL_64_QUAL_##qual(san_args_##sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
+#define __SYSCALL_64(nr, sym, qual, num) [nr] = __SYSCALL_64_QUAL_##qual(san_args_##sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index bb8a12f32610..79fff684d75e 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -18,7 +18,7 @@ syscall_macro() {
qualifier=${entry#*/}
fi
- echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+ echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier, $num)"
}
emit() {
@@ -26,6 +26,7 @@ emit() {
nr="$2"
entry="$3"
compat="$4"
+ num="$5"
if [ "$abi" = "64" -a -n "$compat" ]; then
echo "a compat entry for a 64-bit syscall makes no sense" >&2
@@ -34,15 +35,15 @@ emit() {
if [ -z "$compat" ]; then
if [ -n "$entry" ]; then
- syscall_macro "$abi" "$nr" "$entry"
+ syscall_macro "$abi" "$nr" "$entry" "$num"
fi
else
echo "#ifdef CONFIG_X86_32"
if [ -n "$entry" ]; then
- syscall_macro "$abi" "$nr" "$entry"
+ syscall_macro "$abi" "$nr" "$entry" "$num"
fi
echo "#else"
- syscall_macro "$abi" "$nr" "$compat"
+ syscall_macro "$abi" "$nr" "$compat" "$num"
echo "#endif"
fi
}
@@ -58,14 +59,14 @@ grep '^[0-9]' "$in" | sort -n | (
# COMMON is the same as 64, except that we don't expect X32
# programs to use it. Our expectation has nothing to do with
# any generated code, so treat them the same.
- emit 64 "$nr" "$entry" "$compat"
+ emit 64 "$nr" "$entry" "$compat" "$num"
elif [ "$abi" = "X32" ]; then
# X32 is equivalent to 64 on an X32-compatible kernel.
echo "#ifdef CONFIG_X86_X32_ABI"
- emit 64 "$nr" "$entry" "$compat"
+ emit 64 "$nr" "$entry" "$compat" "$num"
echo "#endif"
elif [ "$abi" = "I386" ]; then
- emit "$abi" "$nr" "$entry" "$compat"
+ emit "$abi" "$nr" "$entry" "$compat" "$num"
else
echo "Unknown abi $abi" >&2
exit 1
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index fa1261eefa16..13c7478bfe57 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -5,7 +5,7 @@
#include <asm/ucontext.h>
-#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual, num) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bf51e51d808d..75d92b53240d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -5,11 +5,11 @@
#include <asm/ia32.h>
-#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
+#define __SYSCALL_64(nr, sym, qual, num) [nr] = 1,
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
-#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual, num) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 9649b5ad2ca2..50002d938cef 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -26,11 +26,11 @@
#define old_mmap sys_old_mmap
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual, num) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
+#define __SYSCALL_I386(nr, sym, qual, num) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index c8bc7fb8cbd6..c39c5b3b8022 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -36,11 +36,11 @@
#define stub_execveat sys_execveat
#define stub_rt_sigreturn sys_rt_sigreturn
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual, num) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
+#define __SYSCALL_64(nr, sym, qual, num) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
--
2.14.3
From: Andi Kleen <[email protected]>
In order to sanitize the system call arguments properly
we need to know the number of syscall arguments for each
syscall. Add a new column to the 32bit and 64bit syscall
tables to list the number of arguments.
Also fix the generation script to not confuse the number
with a compat entry.
Generated with some scripting and quick review (but more eyeballs
would be appreciated)
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/syscalls/syscall_32.tbl | 726 ++++++++++++++++-----------------
arch/x86/entry/syscalls/syscall_64.tbl | 708 ++++++++++++++++----------------
arch/x86/entry/syscalls/syscalltbl.sh | 7 +-
3 files changed, 723 insertions(+), 718 deletions(-)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 448ac2161112..c3a4480365dd 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -6,388 +6,388 @@
#
# The abi is always "i386" for this file.
#
-0 i386 restart_syscall sys_restart_syscall
-1 i386 exit sys_exit
-2 i386 fork sys_fork sys_fork
-3 i386 read sys_read
-4 i386 write sys_write
-5 i386 open sys_open compat_sys_open
-6 i386 close sys_close
-7 i386 waitpid sys_waitpid sys32_waitpid
-8 i386 creat sys_creat
-9 i386 link sys_link
-10 i386 unlink sys_unlink
-11 i386 execve sys_execve compat_sys_execve
-12 i386 chdir sys_chdir
-13 i386 time sys_time compat_sys_time
-14 i386 mknod sys_mknod
-15 i386 chmod sys_chmod
-16 i386 lchown sys_lchown16
+0 i386 restart_syscall sys_restart_syscall 0
+1 i386 exit sys_exit 1
+2 i386 fork sys_fork sys_fork 0
+3 i386 read sys_read 3
+4 i386 write sys_write 3
+5 i386 open sys_open compat_sys_open 3
+6 i386 close sys_close 1
+7 i386 waitpid sys_waitpid sys32_waitpid 3
+8 i386 creat sys_creat 2
+9 i386 link sys_link 2
+10 i386 unlink sys_unlink 1
+11 i386 execve sys_execve compat_sys_execve 3
+12 i386 chdir sys_chdir 1
+13 i386 time sys_time compat_sys_time 1
+14 i386 mknod sys_mknod 3
+15 i386 chmod sys_chmod 2
+16 i386 lchown sys_lchown16 3
17 i386 break
-18 i386 oldstat sys_stat
-19 i386 lseek sys_lseek compat_sys_lseek
-20 i386 getpid sys_getpid
-21 i386 mount sys_mount compat_sys_mount
-22 i386 umount sys_oldumount
-23 i386 setuid sys_setuid16
-24 i386 getuid sys_getuid16
-25 i386 stime sys_stime compat_sys_stime
-26 i386 ptrace sys_ptrace compat_sys_ptrace
-27 i386 alarm sys_alarm
-28 i386 oldfstat sys_fstat
-29 i386 pause sys_pause
-30 i386 utime sys_utime compat_sys_utime
+18 i386 oldstat sys_stat 2
+19 i386 lseek sys_lseek compat_sys_lseek 3
+20 i386 getpid sys_getpid 0
+21 i386 mount sys_mount compat_sys_mount 5
+22 i386 umount sys_oldumount 2
+23 i386 setuid sys_setuid16 1
+24 i386 getuid sys_getuid16 0
+25 i386 stime sys_stime compat_sys_stime 1
+26 i386 ptrace sys_ptrace compat_sys_ptrace 4
+27 i386 alarm sys_alarm 1
+28 i386 oldfstat sys_fstat 2
+29 i386 pause sys_pause 0
+30 i386 utime sys_utime compat_sys_utime 2
31 i386 stty
32 i386 gtty
-33 i386 access sys_access
-34 i386 nice sys_nice
+33 i386 access sys_access 2
+34 i386 nice sys_nice 1
35 i386 ftime
-36 i386 sync sys_sync
-37 i386 kill sys_kill
-38 i386 rename sys_rename
-39 i386 mkdir sys_mkdir
-40 i386 rmdir sys_rmdir
-41 i386 dup sys_dup
-42 i386 pipe sys_pipe
-43 i386 times sys_times compat_sys_times
+36 i386 sync sys_sync 0
+37 i386 kill sys_kill 2
+38 i386 rename sys_rename 2
+39 i386 mkdir sys_mkdir 2
+40 i386 rmdir sys_rmdir 1
+41 i386 dup sys_dup 1
+42 i386 pipe sys_pipe 1
+43 i386 times sys_times compat_sys_times 1
44 i386 prof
-45 i386 brk sys_brk
-46 i386 setgid sys_setgid16
-47 i386 getgid sys_getgid16
-48 i386 signal sys_signal
-49 i386 geteuid sys_geteuid16
-50 i386 getegid sys_getegid16
-51 i386 acct sys_acct
-52 i386 umount2 sys_umount
+45 i386 brk sys_brk 1
+46 i386 setgid sys_setgid16 1
+47 i386 getgid sys_getgid16 0
+48 i386 signal sys_signal 2
+49 i386 geteuid sys_geteuid16 0
+50 i386 getegid sys_getegid16 0
+51 i386 acct sys_acct 1
+52 i386 umount2 sys_umount 2
53 i386 lock
-54 i386 ioctl sys_ioctl compat_sys_ioctl
-55 i386 fcntl sys_fcntl compat_sys_fcntl64
+54 i386 ioctl sys_ioctl compat_sys_ioctl 3
+55 i386 fcntl sys_fcntl compat_sys_fcntl64 3
56 i386 mpx
-57 i386 setpgid sys_setpgid
+57 i386 setpgid sys_setpgid 2
58 i386 ulimit
-59 i386 oldolduname sys_olduname
-60 i386 umask sys_umask
-61 i386 chroot sys_chroot
-62 i386 ustat sys_ustat compat_sys_ustat
-63 i386 dup2 sys_dup2
-64 i386 getppid sys_getppid
-65 i386 getpgrp sys_getpgrp
-66 i386 setsid sys_setsid
-67 i386 sigaction sys_sigaction compat_sys_sigaction
-68 i386 sgetmask sys_sgetmask
-69 i386 ssetmask sys_ssetmask
-70 i386 setreuid sys_setreuid16
-71 i386 setregid sys_setregid16
-72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
-73 i386 sigpending sys_sigpending compat_sys_sigpending
-74 i386 sethostname sys_sethostname
-75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
-76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
-77 i386 getrusage sys_getrusage compat_sys_getrusage
-78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
-79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
-80 i386 getgroups sys_getgroups16
-81 i386 setgroups sys_setgroups16
-82 i386 select sys_old_select compat_sys_old_select
-83 i386 symlink sys_symlink
-84 i386 oldlstat sys_lstat
-85 i386 readlink sys_readlink
-86 i386 uselib sys_uselib
-87 i386 swapon sys_swapon
-88 i386 reboot sys_reboot
-89 i386 readdir sys_old_readdir compat_sys_old_readdir
-90 i386 mmap sys_old_mmap sys32_mmap
-91 i386 munmap sys_munmap
-92 i386 truncate sys_truncate compat_sys_truncate
-93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
-94 i386 fchmod sys_fchmod
-95 i386 fchown sys_fchown16
-96 i386 getpriority sys_getpriority
-97 i386 setpriority sys_setpriority
+59 i386 oldolduname sys_olduname 1
+60 i386 umask sys_umask 1
+61 i386 chroot sys_chroot 1
+62 i386 ustat sys_ustat compat_sys_ustat 2
+63 i386 dup2 sys_dup2 2
+64 i386 getppid sys_getppid 0
+65 i386 getpgrp sys_getpgrp 0
+66 i386 setsid sys_setsid 0
+67 i386 sigaction sys_sigaction compat_sys_sigaction 3
+68 i386 sgetmask sys_sgetmask 0
+69 i386 ssetmask sys_ssetmask 1
+70 i386 setreuid sys_setreuid16 2
+71 i386 setregid sys_setregid16 2
+72 i386 sigsuspend sys_sigsuspend sys_sigsuspend 3
+73 i386 sigpending sys_sigpending compat_sys_sigpending 1
+74 i386 sethostname sys_sethostname 2
+75 i386 setrlimit sys_setrlimit compat_sys_setrlimit 2
+76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit 2
+77 i386 getrusage sys_getrusage compat_sys_getrusage 2
+78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday 2
+79 i386 settimeofday sys_settimeofday compat_sys_settimeofday 2
+80 i386 getgroups sys_getgroups16 2
+81 i386 setgroups sys_setgroups16 2
+82 i386 select sys_old_select compat_sys_old_select 5
+83 i386 symlink sys_symlink 2
+84 i386 oldlstat sys_lstat 2
+85 i386 readlink sys_readlink 3
+86 i386 uselib sys_uselib 1
+87 i386 swapon sys_swapon 2
+88 i386 reboot sys_reboot 4
+89 i386 readdir sys_old_readdir compat_sys_old_readdir 3
+90 i386 mmap sys_old_mmap sys32_mmap 6
+91 i386 munmap sys_munmap 2
+92 i386 truncate sys_truncate compat_sys_truncate 2
+93 i386 ftruncate sys_ftruncate compat_sys_ftruncate 2
+94 i386 fchmod sys_fchmod 2
+95 i386 fchown sys_fchown16 3
+96 i386 getpriority sys_getpriority 2
+97 i386 setpriority sys_setpriority 3
98 i386 profil
-99 i386 statfs sys_statfs compat_sys_statfs
-100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
-101 i386 ioperm sys_ioperm
-102 i386 socketcall sys_socketcall compat_sys_socketcall
-103 i386 syslog sys_syslog
-104 i386 setitimer sys_setitimer compat_sys_setitimer
-105 i386 getitimer sys_getitimer compat_sys_getitimer
-106 i386 stat sys_newstat compat_sys_newstat
-107 i386 lstat sys_newlstat compat_sys_newlstat
-108 i386 fstat sys_newfstat compat_sys_newfstat
-109 i386 olduname sys_uname
-110 i386 iopl sys_iopl
-111 i386 vhangup sys_vhangup
+99 i386 statfs sys_statfs compat_sys_statfs 2
+100 i386 fstatfs sys_fstatfs compat_sys_fstatfs 2
+101 i386 ioperm sys_ioperm 3
+102 i386 socketcall sys_socketcall compat_sys_socketcall 2
+103 i386 syslog sys_syslog 3
+104 i386 setitimer sys_setitimer compat_sys_setitimer 3
+105 i386 getitimer sys_getitimer compat_sys_getitimer 2
+106 i386 stat sys_newstat compat_sys_newstat 2
+107 i386 lstat sys_newlstat compat_sys_newlstat 2
+108 i386 fstat sys_newfstat compat_sys_newfstat 2
+109 i386 olduname sys_uname 1
+110 i386 iopl sys_iopl 1
+111 i386 vhangup sys_vhangup 0
112 i386 idle
-113 i386 vm86old sys_vm86old sys_ni_syscall
-114 i386 wait4 sys_wait4 compat_sys_wait4
-115 i386 swapoff sys_swapoff
-116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
-117 i386 ipc sys_ipc compat_sys_ipc
-118 i386 fsync sys_fsync
-119 i386 sigreturn sys_sigreturn sys32_sigreturn
-120 i386 clone sys_clone stub32_clone
-121 i386 setdomainname sys_setdomainname
-122 i386 uname sys_newuname
-123 i386 modify_ldt sys_modify_ldt
-124 i386 adjtimex sys_adjtimex compat_sys_adjtimex
-125 i386 mprotect sys_mprotect
-126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
+113 i386 vm86old sys_vm86old sys_ni_syscall 1
+114 i386 wait4 sys_wait4 compat_sys_wait4 4
+115 i386 swapoff sys_swapoff 1
+116 i386 sysinfo sys_sysinfo compat_sys_sysinfo 1
+117 i386 ipc sys_ipc compat_sys_ipc 6
+118 i386 fsync sys_fsync 1
+119 i386 sigreturn sys_sigreturn sys32_sigreturn 0
+120 i386 clone sys_clone stub32_clone 6
+121 i386 setdomainname sys_setdomainname 2
+122 i386 uname sys_newuname 1
+123 i386 modify_ldt sys_modify_ldt 3
+124 i386 adjtimex sys_adjtimex compat_sys_adjtimex 1
+125 i386 mprotect sys_mprotect 3
+126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask 3
127 i386 create_module
-128 i386 init_module sys_init_module
-129 i386 delete_module sys_delete_module
+128 i386 init_module sys_init_module 3
+129 i386 delete_module sys_delete_module 2
130 i386 get_kernel_syms
-131 i386 quotactl sys_quotactl sys32_quotactl
-132 i386 getpgid sys_getpgid
-133 i386 fchdir sys_fchdir
-134 i386 bdflush sys_bdflush
-135 i386 sysfs sys_sysfs
-136 i386 personality sys_personality
+131 i386 quotactl sys_quotactl sys32_quotactl 4
+132 i386 getpgid sys_getpgid 1
+133 i386 fchdir sys_fchdir 1
+134 i386 bdflush sys_bdflush 2
+135 i386 sysfs sys_sysfs 3
+136 i386 personality sys_personality 1
137 i386 afs_syscall
-138 i386 setfsuid sys_setfsuid16
-139 i386 setfsgid sys_setfsgid16
-140 i386 _llseek sys_llseek
-141 i386 getdents sys_getdents compat_sys_getdents
-142 i386 _newselect sys_select compat_sys_select
-143 i386 flock sys_flock
-144 i386 msync sys_msync
-145 i386 readv sys_readv compat_sys_readv
-146 i386 writev sys_writev compat_sys_writev
-147 i386 getsid sys_getsid
-148 i386 fdatasync sys_fdatasync
-149 i386 _sysctl sys_sysctl compat_sys_sysctl
-150 i386 mlock sys_mlock
-151 i386 munlock sys_munlock
-152 i386 mlockall sys_mlockall
-153 i386 munlockall sys_munlockall
-154 i386 sched_setparam sys_sched_setparam
-155 i386 sched_getparam sys_sched_getparam
-156 i386 sched_setscheduler sys_sched_setscheduler
-157 i386 sched_getscheduler sys_sched_getscheduler
-158 i386 sched_yield sys_sched_yield
-159 i386 sched_get_priority_max sys_sched_get_priority_max
-160 i386 sched_get_priority_min sys_sched_get_priority_min
-161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
-162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
-163 i386 mremap sys_mremap
-164 i386 setresuid sys_setresuid16
-165 i386 getresuid sys_getresuid16
-166 i386 vm86 sys_vm86 sys_ni_syscall
+138 i386 setfsuid sys_setfsuid16 1
+139 i386 setfsgid sys_setfsgid16 1
+140 i386 _llseek sys_llseek 5
+141 i386 getdents sys_getdents compat_sys_getdents 3
+142 i386 _newselect sys_select compat_sys_select 5
+143 i386 flock sys_flock 2
+144 i386 msync sys_msync 3
+145 i386 readv sys_readv compat_sys_readv 3
+146 i386 writev sys_writev compat_sys_writev 3
+147 i386 getsid sys_getsid 1
+148 i386 fdatasync sys_fdatasync 1
+149 i386 _sysctl sys_sysctl compat_sys_sysctl 1
+150 i386 mlock sys_mlock 2
+151 i386 munlock sys_munlock 2
+152 i386 mlockall sys_mlockall 1
+153 i386 munlockall sys_munlockall 0
+154 i386 sched_setparam sys_sched_setparam 2
+155 i386 sched_getparam sys_sched_getparam 2
+156 i386 sched_setscheduler sys_sched_setscheduler 3
+157 i386 sched_getscheduler sys_sched_getscheduler 1
+158 i386 sched_yield sys_sched_yield 0
+159 i386 sched_get_priority_max sys_sched_get_priority_max 1
+160 i386 sched_get_priority_min sys_sched_get_priority_min 1
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval 2
+162 i386 nanosleep sys_nanosleep compat_sys_nanosleep 2
+163 i386 mremap sys_mremap 5
+164 i386 setresuid sys_setresuid16 3
+165 i386 getresuid sys_getresuid16 3
+166 i386 vm86 sys_vm86 sys_ni_syscall 2
167 i386 query_module
-168 i386 poll sys_poll
+168 i386 poll sys_poll 3
169 i386 nfsservctl
-170 i386 setresgid sys_setresgid16
-171 i386 getresgid sys_getresgid16
-172 i386 prctl sys_prctl
-173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
-174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
-175 i386 rt_sigprocmask sys_rt_sigprocmask
-176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
-177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
-178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
-179 i386 rt_sigsuspend sys_rt_sigsuspend
-180 i386 pread64 sys_pread64 sys32_pread
-181 i386 pwrite64 sys_pwrite64 sys32_pwrite
-182 i386 chown sys_chown16
-183 i386 getcwd sys_getcwd
-184 i386 capget sys_capget
-185 i386 capset sys_capset
-186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
-187 i386 sendfile sys_sendfile compat_sys_sendfile
+170 i386 setresgid sys_setresgid16 3
+171 i386 getresgid sys_getresgid16 3
+172 i386 prctl sys_prctl 5
+173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn 0
+174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 5
+175 i386 rt_sigprocmask sys_rt_sigprocmask 4
+176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending 2
+177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 4
+178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 3
+179 i386 rt_sigsuspend sys_rt_sigsuspend 2
+180 i386 pread64 sys_pread64 sys32_pread 4
+181 i386 pwrite64 sys_pwrite64 sys32_pwrite 4
+182 i386 chown sys_chown16 3
+183 i386 getcwd sys_getcwd 2
+184 i386 capget sys_capget 2
+185 i386 capset sys_capset 2
+186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack 2
+187 i386 sendfile sys_sendfile compat_sys_sendfile 4
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork sys_vfork sys_vfork
-191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
-192 i386 mmap2 sys_mmap_pgoff
-193 i386 truncate64 sys_truncate64 sys32_truncate64
-194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
-195 i386 stat64 sys_stat64 sys32_stat64
-196 i386 lstat64 sys_lstat64 sys32_lstat64
-197 i386 fstat64 sys_fstat64 sys32_fstat64
-198 i386 lchown32 sys_lchown
-199 i386 getuid32 sys_getuid
-200 i386 getgid32 sys_getgid
-201 i386 geteuid32 sys_geteuid
-202 i386 getegid32 sys_getegid
-203 i386 setreuid32 sys_setreuid
-204 i386 setregid32 sys_setregid
-205 i386 getgroups32 sys_getgroups
-206 i386 setgroups32 sys_setgroups
-207 i386 fchown32 sys_fchown
-208 i386 setresuid32 sys_setresuid
-209 i386 getresuid32 sys_getresuid
-210 i386 setresgid32 sys_setresgid
-211 i386 getresgid32 sys_getresgid
-212 i386 chown32 sys_chown
-213 i386 setuid32 sys_setuid
-214 i386 setgid32 sys_setgid
-215 i386 setfsuid32 sys_setfsuid
-216 i386 setfsgid32 sys_setfsgid
-217 i386 pivot_root sys_pivot_root
-218 i386 mincore sys_mincore
-219 i386 madvise sys_madvise
-220 i386 getdents64 sys_getdents64
-221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
+190 i386 vfork sys_vfork sys_vfork 0
+191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 2
+192 i386 mmap2 sys_mmap_pgoff 6
+193 i386 truncate64 sys_truncate64 sys32_truncate64 2
+194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 2
+195 i386 stat64 sys_stat64 sys32_stat64 2
+196 i386 lstat64 sys_lstat64 sys32_lstat64 2
+197 i386 fstat64 sys_fstat64 sys32_fstat64 2
+198 i386 lchown32 sys_lchown 3
+199 i386 getuid32 sys_getuid 0
+200 i386 getgid32 sys_getgid 0
+201 i386 geteuid32 sys_geteuid 0
+202 i386 getegid32 sys_getegid 0
+203 i386 setreuid32 sys_setreuid 2
+204 i386 setregid32 sys_setregid 2
+205 i386 getgroups32 sys_getgroups 2
+206 i386 setgroups32 sys_setgroups 2
+207 i386 fchown32 sys_fchown 3
+208 i386 setresuid32 sys_setresuid 3
+209 i386 getresuid32 sys_getresuid 3
+210 i386 setresgid32 sys_setresgid 3
+211 i386 getresgid32 sys_getresgid 3
+212 i386 chown32 sys_chown 3
+213 i386 setuid32 sys_setuid 1
+214 i386 setgid32 sys_setgid 1
+215 i386 setfsuid32 sys_setfsuid 1
+216 i386 setfsgid32 sys_setfsgid 1
+217 i386 pivot_root sys_pivot_root 2
+218 i386 mincore sys_mincore 3
+219 i386 madvise sys_madvise 3
+220 i386 getdents64 sys_getdents64 3
+221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64 3
# 222 is unused
# 223 is unused
-224 i386 gettid sys_gettid
-225 i386 readahead sys_readahead sys32_readahead
-226 i386 setxattr sys_setxattr
-227 i386 lsetxattr sys_lsetxattr
-228 i386 fsetxattr sys_fsetxattr
-229 i386 getxattr sys_getxattr
-230 i386 lgetxattr sys_lgetxattr
-231 i386 fgetxattr sys_fgetxattr
-232 i386 listxattr sys_listxattr
-233 i386 llistxattr sys_llistxattr
-234 i386 flistxattr sys_flistxattr
-235 i386 removexattr sys_removexattr
-236 i386 lremovexattr sys_lremovexattr
-237 i386 fremovexattr sys_fremovexattr
-238 i386 tkill sys_tkill
-239 i386 sendfile64 sys_sendfile64
-240 i386 futex sys_futex compat_sys_futex
-241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
-242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
-243 i386 set_thread_area sys_set_thread_area
-244 i386 get_thread_area sys_get_thread_area
-245 i386 io_setup sys_io_setup compat_sys_io_setup
-246 i386 io_destroy sys_io_destroy
-247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
-248 i386 io_submit sys_io_submit compat_sys_io_submit
-249 i386 io_cancel sys_io_cancel
-250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
+224 i386 gettid sys_gettid 0
+225 i386 readahead sys_readahead sys32_readahead 3
+226 i386 setxattr sys_setxattr 5
+227 i386 lsetxattr sys_lsetxattr 5
+228 i386 fsetxattr sys_fsetxattr 5
+229 i386 getxattr sys_getxattr 4
+230 i386 lgetxattr sys_lgetxattr 4
+231 i386 fgetxattr sys_fgetxattr 4
+232 i386 listxattr sys_listxattr 3
+233 i386 llistxattr sys_llistxattr 3
+234 i386 flistxattr sys_flistxattr 3
+235 i386 removexattr sys_removexattr 2
+236 i386 lremovexattr sys_lremovexattr 2
+237 i386 fremovexattr sys_fremovexattr 2
+238 i386 tkill sys_tkill 2
+239 i386 sendfile64 sys_sendfile64 4
+240 i386 futex sys_futex compat_sys_futex 6
+241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 3
+242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 3
+243 i386 set_thread_area sys_set_thread_area 1
+244 i386 get_thread_area sys_get_thread_area 1
+245 i386 io_setup sys_io_setup compat_sys_io_setup 2
+246 i386 io_destroy sys_io_destroy 1
+247 i386 io_getevents sys_io_getevents compat_sys_io_getevents 5
+248 i386 io_submit sys_io_submit compat_sys_io_submit 3
+249 i386 io_cancel sys_io_cancel 3
+250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 4
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
-252 i386 exit_group sys_exit_group
-253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
-254 i386 epoll_create sys_epoll_create
-255 i386 epoll_ctl sys_epoll_ctl
-256 i386 epoll_wait sys_epoll_wait
-257 i386 remap_file_pages sys_remap_file_pages
-258 i386 set_tid_address sys_set_tid_address
-259 i386 timer_create sys_timer_create compat_sys_timer_create
-260 i386 timer_settime sys_timer_settime compat_sys_timer_settime
-261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime
-262 i386 timer_getoverrun sys_timer_getoverrun
-263 i386 timer_delete sys_timer_delete
-264 i386 clock_settime sys_clock_settime compat_sys_clock_settime
-265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime
-266 i386 clock_getres sys_clock_getres compat_sys_clock_getres
-267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
-268 i386 statfs64 sys_statfs64 compat_sys_statfs64
-269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
-270 i386 tgkill sys_tgkill
-271 i386 utimes sys_utimes compat_sys_utimes
-272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
+252 i386 exit_group sys_exit_group 1
+253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie 3
+254 i386 epoll_create sys_epoll_create 1
+255 i386 epoll_ctl sys_epoll_ctl 4
+256 i386 epoll_wait sys_epoll_wait 4
+257 i386 remap_file_pages sys_remap_file_pages 5
+258 i386 set_tid_address sys_set_tid_address 1
+259 i386 timer_create sys_timer_create compat_sys_timer_create 3
+260 i386 timer_settime sys_timer_settime compat_sys_timer_settime 4
+261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime 2
+262 i386 timer_getoverrun sys_timer_getoverrun 1
+263 i386 timer_delete sys_timer_delete 1
+264 i386 clock_settime sys_clock_settime compat_sys_clock_settime 2
+265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime 2
+266 i386 clock_getres sys_clock_getres compat_sys_clock_getres 2
+267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep 4
+268 i386 statfs64 sys_statfs64 compat_sys_statfs64 3
+269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 3
+270 i386 tgkill sys_tgkill 3
+271 i386 utimes sys_utimes compat_sys_utimes 2
+272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 4
273 i386 vserver
-274 i386 mbind sys_mbind
-275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
-276 i386 set_mempolicy sys_set_mempolicy
-277 i386 mq_open sys_mq_open compat_sys_mq_open
-278 i386 mq_unlink sys_mq_unlink
-279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
-280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
-281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
-282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
-283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
-284 i386 waitid sys_waitid compat_sys_waitid
+274 i386 mbind sys_mbind 6
+275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 5
+276 i386 set_mempolicy sys_set_mempolicy 3
+277 i386 mq_open sys_mq_open compat_sys_mq_open 4
+278 i386 mq_unlink sys_mq_unlink 1
+279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend 5
+280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive 5
+281 i386 mq_notify sys_mq_notify compat_sys_mq_notify 2
+282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 3
+283 i386 kexec_load sys_kexec_load compat_sys_kexec_load 4
+284 i386 waitid sys_waitid compat_sys_waitid 5
# 285 sys_setaltroot
-286 i386 add_key sys_add_key
-287 i386 request_key sys_request_key
-288 i386 keyctl sys_keyctl compat_sys_keyctl
-289 i386 ioprio_set sys_ioprio_set
-290 i386 ioprio_get sys_ioprio_get
-291 i386 inotify_init sys_inotify_init
-292 i386 inotify_add_watch sys_inotify_add_watch
-293 i386 inotify_rm_watch sys_inotify_rm_watch
-294 i386 migrate_pages sys_migrate_pages
-295 i386 openat sys_openat compat_sys_openat
-296 i386 mkdirat sys_mkdirat
-297 i386 mknodat sys_mknodat
-298 i386 fchownat sys_fchownat
-299 i386 futimesat sys_futimesat compat_sys_futimesat
-300 i386 fstatat64 sys_fstatat64 sys32_fstatat
-301 i386 unlinkat sys_unlinkat
-302 i386 renameat sys_renameat
-303 i386 linkat sys_linkat
-304 i386 symlinkat sys_symlinkat
-305 i386 readlinkat sys_readlinkat
-306 i386 fchmodat sys_fchmodat
-307 i386 faccessat sys_faccessat
-308 i386 pselect6 sys_pselect6 compat_sys_pselect6
-309 i386 ppoll sys_ppoll compat_sys_ppoll
-310 i386 unshare sys_unshare
-311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
-312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
-313 i386 splice sys_splice
-314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
-315 i386 tee sys_tee
-316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
-317 i386 move_pages sys_move_pages compat_sys_move_pages
-318 i386 getcpu sys_getcpu
-319 i386 epoll_pwait sys_epoll_pwait
-320 i386 utimensat sys_utimensat compat_sys_utimensat
-321 i386 signalfd sys_signalfd compat_sys_signalfd
-322 i386 timerfd_create sys_timerfd_create
-323 i386 eventfd sys_eventfd
-324 i386 fallocate sys_fallocate sys32_fallocate
-325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
-326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
-327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
-328 i386 eventfd2 sys_eventfd2
-329 i386 epoll_create1 sys_epoll_create1
-330 i386 dup3 sys_dup3
-331 i386 pipe2 sys_pipe2
-332 i386 inotify_init1 sys_inotify_init1
-333 i386 preadv sys_preadv compat_sys_preadv
-334 i386 pwritev sys_pwritev compat_sys_pwritev
-335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
-336 i386 perf_event_open sys_perf_event_open
-337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg
-338 i386 fanotify_init sys_fanotify_init
-339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
-340 i386 prlimit64 sys_prlimit64
-341 i386 name_to_handle_at sys_name_to_handle_at
-342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
-343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
-344 i386 syncfs sys_syncfs
-345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
-346 i386 setns sys_setns
-347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
-349 i386 kcmp sys_kcmp
-350 i386 finit_module sys_finit_module
-351 i386 sched_setattr sys_sched_setattr
-352 i386 sched_getattr sys_sched_getattr
-353 i386 renameat2 sys_renameat2
-354 i386 seccomp sys_seccomp
-355 i386 getrandom sys_getrandom
-356 i386 memfd_create sys_memfd_create
-357 i386 bpf sys_bpf
-358 i386 execveat sys_execveat compat_sys_execveat
-359 i386 socket sys_socket
-360 i386 socketpair sys_socketpair
-361 i386 bind sys_bind
-362 i386 connect sys_connect
-363 i386 listen sys_listen
-364 i386 accept4 sys_accept4
-365 i386 getsockopt sys_getsockopt compat_sys_getsockopt
-366 i386 setsockopt sys_setsockopt compat_sys_setsockopt
-367 i386 getsockname sys_getsockname
-368 i386 getpeername sys_getpeername
-369 i386 sendto sys_sendto
-370 i386 sendmsg sys_sendmsg compat_sys_sendmsg
-371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
-372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
-373 i386 shutdown sys_shutdown
-374 i386 userfaultfd sys_userfaultfd
-375 i386 membarrier sys_membarrier
-376 i386 mlock2 sys_mlock2
-377 i386 copy_file_range sys_copy_file_range
-378 i386 preadv2 sys_preadv2 compat_sys_preadv2
-379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
-380 i386 pkey_mprotect sys_pkey_mprotect
-381 i386 pkey_alloc sys_pkey_alloc
-382 i386 pkey_free sys_pkey_free
-383 i386 statx sys_statx
-384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+286 i386 add_key sys_add_key 5
+287 i386 request_key sys_request_key 4
+288 i386 keyctl sys_keyctl compat_sys_keyctl 5
+289 i386 ioprio_set sys_ioprio_set 3
+290 i386 ioprio_get sys_ioprio_get 2
+291 i386 inotify_init sys_inotify_init 0
+292 i386 inotify_add_watch sys_inotify_add_watch 3
+293 i386 inotify_rm_watch sys_inotify_rm_watch 2
+294 i386 migrate_pages sys_migrate_pages 4
+295 i386 openat sys_openat compat_sys_openat 4
+296 i386 mkdirat sys_mkdirat 3
+297 i386 mknodat sys_mknodat 4
+298 i386 fchownat sys_fchownat 5
+299 i386 futimesat sys_futimesat compat_sys_futimesat 3
+300 i386 fstatat64 sys_fstatat64 sys32_fstatat 4
+301 i386 unlinkat sys_unlinkat 3
+302 i386 renameat sys_renameat 4
+303 i386 linkat sys_linkat 5
+304 i386 symlinkat sys_symlinkat 3
+305 i386 readlinkat sys_readlinkat 4
+306 i386 fchmodat sys_fchmodat 3
+307 i386 faccessat sys_faccessat 3
+308 i386 pselect6 sys_pselect6 compat_sys_pselect6 6
+309 i386 ppoll sys_ppoll compat_sys_ppoll 5
+310 i386 unshare sys_unshare 1
+311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list 2
+312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list 3
+313 i386 splice sys_splice 6
+314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range 4
+315 i386 tee sys_tee 4
+316 i386 vmsplice sys_vmsplice compat_sys_vmsplice 4
+317 i386 move_pages sys_move_pages compat_sys_move_pages 6
+318 i386 getcpu sys_getcpu 3
+319 i386 epoll_pwait sys_epoll_pwait 6
+320 i386 utimensat sys_utimensat compat_sys_utimensat 4
+321 i386 signalfd sys_signalfd compat_sys_signalfd 3
+322 i386 timerfd_create sys_timerfd_create 2
+323 i386 eventfd sys_eventfd 1
+324 i386 fallocate sys_fallocate sys32_fallocate 4
+325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime 4
+326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime 2
+327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 4
+328 i386 eventfd2 sys_eventfd2 2
+329 i386 epoll_create1 sys_epoll_create1 1
+330 i386 dup3 sys_dup3 3
+331 i386 pipe2 sys_pipe2 2
+332 i386 inotify_init1 sys_inotify_init1 1
+333 i386 preadv sys_preadv compat_sys_preadv 5
+334 i386 pwritev sys_pwritev compat_sys_pwritev 5
+335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 4
+336 i386 perf_event_open sys_perf_event_open 5
+337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg 5
+338 i386 fanotify_init sys_fanotify_init 2
+339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 5
+340 i386 prlimit64 sys_prlimit64 4
+341 i386 name_to_handle_at sys_name_to_handle_at 5
+342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at 3
+343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime 2
+344 i386 syncfs sys_syncfs 1
+345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg 4
+346 i386 setns sys_setns 2
+347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 6
+348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 6
+349 i386 kcmp sys_kcmp 5
+350 i386 finit_module sys_finit_module 3
+351 i386 sched_setattr sys_sched_setattr 3
+352 i386 sched_getattr sys_sched_getattr 4
+353 i386 renameat2 sys_renameat2 5
+354 i386 seccomp sys_seccomp 3
+355 i386 getrandom sys_getrandom 3
+356 i386 memfd_create sys_memfd_create 2
+357 i386 bpf sys_bpf 3
+358 i386 execveat sys_execveat compat_sys_execveat 5
+359 i386 socket sys_socket 3
+360 i386 socketpair sys_socketpair 4
+361 i386 bind sys_bind 3
+362 i386 connect sys_connect 3
+363 i386 listen sys_listen 2
+364 i386 accept4 sys_accept4 4
+365 i386 getsockopt sys_getsockopt compat_sys_getsockopt 5
+366 i386 setsockopt sys_setsockopt compat_sys_setsockopt 5
+367 i386 getsockname sys_getsockname 3
+368 i386 getpeername sys_getpeername 3
+369 i386 sendto sys_sendto 6
+370 i386 sendmsg sys_sendmsg compat_sys_sendmsg 3
+371 i386 recvfrom sys_recvfrom compat_sys_recvfrom 6
+372 i386 recvmsg sys_recvmsg compat_sys_recvmsg 3
+373 i386 shutdown sys_shutdown 2
+374 i386 userfaultfd sys_userfaultfd 1
+375 i386 membarrier sys_membarrier 2
+376 i386 mlock2 sys_mlock2 3
+377 i386 copy_file_range sys_copy_file_range 6
+378 i386 preadv2 sys_preadv2 compat_sys_preadv2 6
+379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2 6
+380 i386 pkey_mprotect sys_pkey_mprotect 4
+381 i386 pkey_alloc sys_pkey_alloc 2
+382 i386 pkey_free sys_pkey_free 1
+383 i386 statx sys_statx 5
+384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl 2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 5aef183e2f85..4783ba204b8f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -6,377 +6,377 @@
#
# The abi is "common", "64" or "x32" for this file.
#
-0 common read sys_read
-1 common write sys_write
-2 common open sys_open
-3 common close sys_close
-4 common stat sys_newstat
-5 common fstat sys_newfstat
-6 common lstat sys_newlstat
-7 common poll sys_poll
-8 common lseek sys_lseek
-9 common mmap sys_mmap
-10 common mprotect sys_mprotect
-11 common munmap sys_munmap
-12 common brk sys_brk
-13 64 rt_sigaction sys_rt_sigaction
-14 common rt_sigprocmask sys_rt_sigprocmask
-15 64 rt_sigreturn sys_rt_sigreturn/ptregs
-16 64 ioctl sys_ioctl
-17 common pread64 sys_pread64
-18 common pwrite64 sys_pwrite64
-19 64 readv sys_readv
-20 64 writev sys_writev
-21 common access sys_access
-22 common pipe sys_pipe
-23 common select sys_select
-24 common sched_yield sys_sched_yield
-25 common mremap sys_mremap
-26 common msync sys_msync
-27 common mincore sys_mincore
-28 common madvise sys_madvise
-29 common shmget sys_shmget
-30 common shmat sys_shmat
-31 common shmctl sys_shmctl
-32 common dup sys_dup
-33 common dup2 sys_dup2
-34 common pause sys_pause
-35 common nanosleep sys_nanosleep
-36 common getitimer sys_getitimer
-37 common alarm sys_alarm
-38 common setitimer sys_setitimer
-39 common getpid sys_getpid
-40 common sendfile sys_sendfile64
-41 common socket sys_socket
-42 common connect sys_connect
-43 common accept sys_accept
-44 common sendto sys_sendto
-45 64 recvfrom sys_recvfrom
-46 64 sendmsg sys_sendmsg
-47 64 recvmsg sys_recvmsg
-48 common shutdown sys_shutdown
-49 common bind sys_bind
-50 common listen sys_listen
-51 common getsockname sys_getsockname
-52 common getpeername sys_getpeername
-53 common socketpair sys_socketpair
-54 64 setsockopt sys_setsockopt
-55 64 getsockopt sys_getsockopt
-56 common clone sys_clone/ptregs
-57 common fork sys_fork/ptregs
-58 common vfork sys_vfork/ptregs
-59 64 execve sys_execve/ptregs
-60 common exit sys_exit
-61 common wait4 sys_wait4
-62 common kill sys_kill
-63 common uname sys_newuname
-64 common semget sys_semget
-65 common semop sys_semop
-66 common semctl sys_semctl
-67 common shmdt sys_shmdt
-68 common msgget sys_msgget
-69 common msgsnd sys_msgsnd
-70 common msgrcv sys_msgrcv
-71 common msgctl sys_msgctl
-72 common fcntl sys_fcntl
-73 common flock sys_flock
-74 common fsync sys_fsync
-75 common fdatasync sys_fdatasync
-76 common truncate sys_truncate
-77 common ftruncate sys_ftruncate
-78 common getdents sys_getdents
-79 common getcwd sys_getcwd
-80 common chdir sys_chdir
-81 common fchdir sys_fchdir
-82 common rename sys_rename
-83 common mkdir sys_mkdir
-84 common rmdir sys_rmdir
-85 common creat sys_creat
-86 common link sys_link
-87 common unlink sys_unlink
-88 common symlink sys_symlink
-89 common readlink sys_readlink
-90 common chmod sys_chmod
-91 common fchmod sys_fchmod
-92 common chown sys_chown
-93 common fchown sys_fchown
-94 common lchown sys_lchown
-95 common umask sys_umask
-96 common gettimeofday sys_gettimeofday
-97 common getrlimit sys_getrlimit
-98 common getrusage sys_getrusage
-99 common sysinfo sys_sysinfo
-100 common times sys_times
-101 64 ptrace sys_ptrace
-102 common getuid sys_getuid
-103 common syslog sys_syslog
-104 common getgid sys_getgid
-105 common setuid sys_setuid
-106 common setgid sys_setgid
-107 common geteuid sys_geteuid
-108 common getegid sys_getegid
-109 common setpgid sys_setpgid
-110 common getppid sys_getppid
-111 common getpgrp sys_getpgrp
-112 common setsid sys_setsid
-113 common setreuid sys_setreuid
-114 common setregid sys_setregid
-115 common getgroups sys_getgroups
-116 common setgroups sys_setgroups
-117 common setresuid sys_setresuid
-118 common getresuid sys_getresuid
-119 common setresgid sys_setresgid
-120 common getresgid sys_getresgid
-121 common getpgid sys_getpgid
-122 common setfsuid sys_setfsuid
-123 common setfsgid sys_setfsgid
-124 common getsid sys_getsid
-125 common capget sys_capget
-126 common capset sys_capset
-127 64 rt_sigpending sys_rt_sigpending
-128 64 rt_sigtimedwait sys_rt_sigtimedwait
-129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
-130 common rt_sigsuspend sys_rt_sigsuspend
-131 64 sigaltstack sys_sigaltstack
-132 common utime sys_utime
-133 common mknod sys_mknod
+0 common read sys_read 3
+1 common write sys_write 3
+2 common open sys_open 3
+3 common close sys_close 1
+4 common stat sys_newstat 2
+5 common fstat sys_newfstat 2
+6 common lstat sys_newlstat 2
+7 common poll sys_poll 3
+8 common lseek sys_lseek 3
+9 common mmap sys_mmap 6
+10 common mprotect sys_mprotect 3
+11 common munmap sys_munmap 2
+12 common brk sys_brk 1
+13 64 rt_sigaction sys_rt_sigaction 5
+14 common rt_sigprocmask sys_rt_sigprocmask 4
+15 64 rt_sigreturn sys_rt_sigreturn/ptregs 0
+16 64 ioctl sys_ioctl 3
+17 common pread64 sys_pread64 4
+18 common pwrite64 sys_pwrite64 4
+19 64 readv sys_readv 3
+20 64 writev sys_writev 3
+21 common access sys_access 2
+22 common pipe sys_pipe 1
+23 common select sys_select 5
+24 common sched_yield sys_sched_yield 0
+25 common mremap sys_mremap 5
+26 common msync sys_msync 3
+27 common mincore sys_mincore 3
+28 common madvise sys_madvise 3
+29 common shmget sys_shmget 3
+30 common shmat sys_shmat 3
+31 common shmctl sys_shmctl 3
+32 common dup sys_dup 1
+33 common dup2 sys_dup2 2
+34 common pause sys_pause 0
+35 common nanosleep sys_nanosleep 2
+36 common getitimer sys_getitimer 2
+37 common alarm sys_alarm 1
+38 common setitimer sys_setitimer 3
+39 common getpid sys_getpid 0
+40 common sendfile sys_sendfile64 4
+41 common socket sys_socket 3
+42 common connect sys_connect 3
+43 common accept sys_accept 3
+44 common sendto sys_sendto 6
+45 64 recvfrom sys_recvfrom 6
+46 64 sendmsg sys_sendmsg 3
+47 64 recvmsg sys_recvmsg 3
+48 common shutdown sys_shutdown 2
+49 common bind sys_bind 3
+50 common listen sys_listen 2
+51 common getsockname sys_getsockname 3
+52 common getpeername sys_getpeername 3
+53 common socketpair sys_socketpair 4
+54 64 setsockopt sys_setsockopt 5
+55 64 getsockopt sys_getsockopt 5
+56 common clone sys_clone/ptregs 6
+57 common fork sys_fork/ptregs 0
+58 common vfork sys_vfork/ptregs 0
+59 64 execve sys_execve/ptregs 3
+60 common exit sys_exit 1
+61 common wait4 sys_wait4 4
+62 common kill sys_kill 2
+63 common uname sys_newuname 1
+64 common semget sys_semget 3
+65 common semop sys_semop 3
+66 common semctl sys_semctl 4
+67 common shmdt sys_shmdt 1
+68 common msgget sys_msgget 2
+69 common msgsnd sys_msgsnd 4
+70 common msgrcv sys_msgrcv 5
+71 common msgctl sys_msgctl 3
+72 common fcntl sys_fcntl 3
+73 common flock sys_flock 2
+74 common fsync sys_fsync 1
+75 common fdatasync sys_fdatasync 1
+76 common truncate sys_truncate 2
+77 common ftruncate sys_ftruncate 2
+78 common getdents sys_getdents 3
+79 common getcwd sys_getcwd 2
+80 common chdir sys_chdir 1
+81 common fchdir sys_fchdir 1
+82 common rename sys_rename 2
+83 common mkdir sys_mkdir 2
+84 common rmdir sys_rmdir 1
+85 common creat sys_creat 2
+86 common link sys_link 2
+87 common unlink sys_unlink 1
+88 common symlink sys_symlink 2
+89 common readlink sys_readlink 3
+90 common chmod sys_chmod 2
+91 common fchmod sys_fchmod 2
+92 common chown sys_chown 3
+93 common fchown sys_fchown 3
+94 common lchown sys_lchown 3
+95 common umask sys_umask 1
+96 common gettimeofday sys_gettimeofday 2
+97 common getrlimit sys_getrlimit 2
+98 common getrusage sys_getrusage 2
+99 common sysinfo sys_sysinfo 1
+100 common times sys_times 1
+101 64 ptrace sys_ptrace 4
+102 common getuid sys_getuid 0
+103 common syslog sys_syslog 3
+104 common getgid sys_getgid 0
+105 common setuid sys_setuid 1
+106 common setgid sys_setgid 1
+107 common geteuid sys_geteuid 0
+108 common getegid sys_getegid 0
+109 common setpgid sys_setpgid 2
+110 common getppid sys_getppid 0
+111 common getpgrp sys_getpgrp 0
+112 common setsid sys_setsid 0
+113 common setreuid sys_setreuid 2
+114 common setregid sys_setregid 2
+115 common getgroups sys_getgroups 2
+116 common setgroups sys_setgroups 2
+117 common setresuid sys_setresuid 3
+118 common getresuid sys_getresuid 3
+119 common setresgid sys_setresgid 3
+120 common getresgid sys_getresgid 3
+121 common getpgid sys_getpgid 1
+122 common setfsuid sys_setfsuid 1
+123 common setfsgid sys_setfsgid 1
+124 common getsid sys_getsid 1
+125 common capget sys_capget 2
+126 common capset sys_capset 2
+127 64 rt_sigpending sys_rt_sigpending 2
+128 64 rt_sigtimedwait sys_rt_sigtimedwait 4
+129 64 rt_sigqueueinfo sys_rt_sigqueueinfo 3
+130 common rt_sigsuspend sys_rt_sigsuspend 2
+131 64 sigaltstack sys_sigaltstack 2
+132 common utime sys_utime 2
+133 common mknod sys_mknod 3
134 64 uselib
-135 common personality sys_personality
-136 common ustat sys_ustat
-137 common statfs sys_statfs
-138 common fstatfs sys_fstatfs
-139 common sysfs sys_sysfs
-140 common getpriority sys_getpriority
-141 common setpriority sys_setpriority
-142 common sched_setparam sys_sched_setparam
-143 common sched_getparam sys_sched_getparam
-144 common sched_setscheduler sys_sched_setscheduler
-145 common sched_getscheduler sys_sched_getscheduler
-146 common sched_get_priority_max sys_sched_get_priority_max
-147 common sched_get_priority_min sys_sched_get_priority_min
-148 common sched_rr_get_interval sys_sched_rr_get_interval
-149 common mlock sys_mlock
-150 common munlock sys_munlock
-151 common mlockall sys_mlockall
-152 common munlockall sys_munlockall
-153 common vhangup sys_vhangup
-154 common modify_ldt sys_modify_ldt
-155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
-157 common prctl sys_prctl
-158 common arch_prctl sys_arch_prctl
-159 common adjtimex sys_adjtimex
-160 common setrlimit sys_setrlimit
-161 common chroot sys_chroot
-162 common sync sys_sync
-163 common acct sys_acct
-164 common settimeofday sys_settimeofday
-165 common mount sys_mount
-166 common umount2 sys_umount
-167 common swapon sys_swapon
-168 common swapoff sys_swapoff
-169 common reboot sys_reboot
-170 common sethostname sys_sethostname
-171 common setdomainname sys_setdomainname
-172 common iopl sys_iopl/ptregs
-173 common ioperm sys_ioperm
+135 common personality sys_personality 1
+136 common ustat sys_ustat 2
+137 common statfs sys_statfs 2
+138 common fstatfs sys_fstatfs 2
+139 common sysfs sys_sysfs 3
+140 common getpriority sys_getpriority 2
+141 common setpriority sys_setpriority 3
+142 common sched_setparam sys_sched_setparam 2
+143 common sched_getparam sys_sched_getparam 2
+144 common sched_setscheduler sys_sched_setscheduler 3
+145 common sched_getscheduler sys_sched_getscheduler 1
+146 common sched_get_priority_max sys_sched_get_priority_max 1
+147 common sched_get_priority_min sys_sched_get_priority_min 1
+148 common sched_rr_get_interval sys_sched_rr_get_interval 2
+149 common mlock sys_mlock 2
+150 common munlock sys_munlock 2
+151 common mlockall sys_mlockall 1
+152 common munlockall sys_munlockall 0
+153 common vhangup sys_vhangup 0
+154 common modify_ldt sys_modify_ldt 3
+155 common pivot_root sys_pivot_root 2
+156 64 _sysctl sys_sysctl 1
+157 common prctl sys_prctl 5
+158 common arch_prctl sys_arch_prctl 2
+159 common adjtimex sys_adjtimex 1
+160 common setrlimit sys_setrlimit 2
+161 common chroot sys_chroot 1
+162 common sync sys_sync 0
+163 common acct sys_acct 1
+164 common settimeofday sys_settimeofday 2
+165 common mount sys_mount 5
+166 common umount2 sys_umount 2
+167 common swapon sys_swapon 2
+168 common swapoff sys_swapoff 1
+169 common reboot sys_reboot 4
+170 common sethostname sys_sethostname 2
+171 common setdomainname sys_setdomainname 2
+172 common iopl sys_iopl/ptregs 1
+173 common ioperm sys_ioperm 3
174 64 create_module
-175 common init_module sys_init_module
-176 common delete_module sys_delete_module
+175 common init_module sys_init_module 3
+176 common delete_module sys_delete_module 2
177 64 get_kernel_syms
178 64 query_module
-179 common quotactl sys_quotactl
+179 common quotactl sys_quotactl 4
180 64 nfsservctl
181 common getpmsg
182 common putpmsg
183 common afs_syscall
184 common tuxcall
185 common security
-186 common gettid sys_gettid
-187 common readahead sys_readahead
-188 common setxattr sys_setxattr
-189 common lsetxattr sys_lsetxattr
-190 common fsetxattr sys_fsetxattr
-191 common getxattr sys_getxattr
-192 common lgetxattr sys_lgetxattr
-193 common fgetxattr sys_fgetxattr
-194 common listxattr sys_listxattr
-195 common llistxattr sys_llistxattr
-196 common flistxattr sys_flistxattr
-197 common removexattr sys_removexattr
-198 common lremovexattr sys_lremovexattr
-199 common fremovexattr sys_fremovexattr
-200 common tkill sys_tkill
-201 common time sys_time
-202 common futex sys_futex
-203 common sched_setaffinity sys_sched_setaffinity
-204 common sched_getaffinity sys_sched_getaffinity
+186 common gettid sys_gettid 0
+187 common readahead sys_readahead 3
+188 common setxattr sys_setxattr 5
+189 common lsetxattr sys_lsetxattr 5
+190 common fsetxattr sys_fsetxattr 5
+191 common getxattr sys_getxattr 4
+192 common lgetxattr sys_lgetxattr 4
+193 common fgetxattr sys_fgetxattr 4
+194 common listxattr sys_listxattr 3
+195 common llistxattr sys_llistxattr 3
+196 common flistxattr sys_flistxattr 3
+197 common removexattr sys_removexattr 2
+198 common lremovexattr sys_lremovexattr 2
+199 common fremovexattr sys_fremovexattr 2
+200 common tkill sys_tkill 2
+201 common time sys_time 1
+202 common futex sys_futex 6
+203 common sched_setaffinity sys_sched_setaffinity 3
+204 common sched_getaffinity sys_sched_getaffinity 3
205 64 set_thread_area
-206 64 io_setup sys_io_setup
-207 common io_destroy sys_io_destroy
-208 common io_getevents sys_io_getevents
-209 64 io_submit sys_io_submit
-210 common io_cancel sys_io_cancel
+206 64 io_setup sys_io_setup 2
+207 common io_destroy sys_io_destroy 1
+208 common io_getevents sys_io_getevents 5
+209 64 io_submit sys_io_submit 3
+210 common io_cancel sys_io_cancel 3
211 64 get_thread_area
-212 common lookup_dcookie sys_lookup_dcookie
-213 common epoll_create sys_epoll_create
+212 common lookup_dcookie sys_lookup_dcookie 3
+213 common epoll_create sys_epoll_create 1
214 64 epoll_ctl_old
215 64 epoll_wait_old
-216 common remap_file_pages sys_remap_file_pages
-217 common getdents64 sys_getdents64
-218 common set_tid_address sys_set_tid_address
-219 common restart_syscall sys_restart_syscall
-220 common semtimedop sys_semtimedop
-221 common fadvise64 sys_fadvise64
-222 64 timer_create sys_timer_create
-223 common timer_settime sys_timer_settime
-224 common timer_gettime sys_timer_gettime
-225 common timer_getoverrun sys_timer_getoverrun
-226 common timer_delete sys_timer_delete
-227 common clock_settime sys_clock_settime
-228 common clock_gettime sys_clock_gettime
-229 common clock_getres sys_clock_getres
-230 common clock_nanosleep sys_clock_nanosleep
-231 common exit_group sys_exit_group
-232 common epoll_wait sys_epoll_wait
-233 common epoll_ctl sys_epoll_ctl
-234 common tgkill sys_tgkill
-235 common utimes sys_utimes
+216 common remap_file_pages sys_remap_file_pages 5
+217 common getdents64 sys_getdents64 3
+218 common set_tid_address sys_set_tid_address 1
+219 common restart_syscall sys_restart_syscall 0
+220 common semtimedop sys_semtimedop 4
+221 common fadvise64 sys_fadvise64 4
+222 64 timer_create sys_timer_create 3
+223 common timer_settime sys_timer_settime 4
+224 common timer_gettime sys_timer_gettime 2
+225 common timer_getoverrun sys_timer_getoverrun 1
+226 common timer_delete sys_timer_delete 1
+227 common clock_settime sys_clock_settime 2
+228 common clock_gettime sys_clock_gettime 2
+229 common clock_getres sys_clock_getres 2
+230 common clock_nanosleep sys_clock_nanosleep 4
+231 common exit_group sys_exit_group 1
+232 common epoll_wait sys_epoll_wait 4
+233 common epoll_ctl sys_epoll_ctl 4
+234 common tgkill sys_tgkill 3
+235 common utimes sys_utimes 2
236 64 vserver
-237 common mbind sys_mbind
-238 common set_mempolicy sys_set_mempolicy
-239 common get_mempolicy sys_get_mempolicy
-240 common mq_open sys_mq_open
-241 common mq_unlink sys_mq_unlink
-242 common mq_timedsend sys_mq_timedsend
-243 common mq_timedreceive sys_mq_timedreceive
-244 64 mq_notify sys_mq_notify
-245 common mq_getsetattr sys_mq_getsetattr
-246 64 kexec_load sys_kexec_load
-247 64 waitid sys_waitid
-248 common add_key sys_add_key
-249 common request_key sys_request_key
-250 common keyctl sys_keyctl
-251 common ioprio_set sys_ioprio_set
-252 common ioprio_get sys_ioprio_get
-253 common inotify_init sys_inotify_init
-254 common inotify_add_watch sys_inotify_add_watch
-255 common inotify_rm_watch sys_inotify_rm_watch
-256 common migrate_pages sys_migrate_pages
-257 common openat sys_openat
-258 common mkdirat sys_mkdirat
-259 common mknodat sys_mknodat
-260 common fchownat sys_fchownat
-261 common futimesat sys_futimesat
-262 common newfstatat sys_newfstatat
-263 common unlinkat sys_unlinkat
-264 common renameat sys_renameat
-265 common linkat sys_linkat
-266 common symlinkat sys_symlinkat
-267 common readlinkat sys_readlinkat
-268 common fchmodat sys_fchmodat
-269 common faccessat sys_faccessat
-270 common pselect6 sys_pselect6
-271 common ppoll sys_ppoll
-272 common unshare sys_unshare
-273 64 set_robust_list sys_set_robust_list
-274 64 get_robust_list sys_get_robust_list
-275 common splice sys_splice
-276 common tee sys_tee
-277 common sync_file_range sys_sync_file_range
-278 64 vmsplice sys_vmsplice
-279 64 move_pages sys_move_pages
-280 common utimensat sys_utimensat
-281 common epoll_pwait sys_epoll_pwait
-282 common signalfd sys_signalfd
-283 common timerfd_create sys_timerfd_create
-284 common eventfd sys_eventfd
-285 common fallocate sys_fallocate
-286 common timerfd_settime sys_timerfd_settime
-287 common timerfd_gettime sys_timerfd_gettime
-288 common accept4 sys_accept4
-289 common signalfd4 sys_signalfd4
-290 common eventfd2 sys_eventfd2
-291 common epoll_create1 sys_epoll_create1
-292 common dup3 sys_dup3
-293 common pipe2 sys_pipe2
-294 common inotify_init1 sys_inotify_init1
-295 64 preadv sys_preadv
-296 64 pwritev sys_pwritev
-297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
-298 common perf_event_open sys_perf_event_open
-299 64 recvmmsg sys_recvmmsg
-300 common fanotify_init sys_fanotify_init
-301 common fanotify_mark sys_fanotify_mark
-302 common prlimit64 sys_prlimit64
-303 common name_to_handle_at sys_name_to_handle_at
-304 common open_by_handle_at sys_open_by_handle_at
-305 common clock_adjtime sys_clock_adjtime
-306 common syncfs sys_syncfs
-307 64 sendmmsg sys_sendmmsg
-308 common setns sys_setns
-309 common getcpu sys_getcpu
-310 64 process_vm_readv sys_process_vm_readv
-311 64 process_vm_writev sys_process_vm_writev
-312 common kcmp sys_kcmp
-313 common finit_module sys_finit_module
-314 common sched_setattr sys_sched_setattr
-315 common sched_getattr sys_sched_getattr
-316 common renameat2 sys_renameat2
-317 common seccomp sys_seccomp
-318 common getrandom sys_getrandom
-319 common memfd_create sys_memfd_create
-320 common kexec_file_load sys_kexec_file_load
-321 common bpf sys_bpf
-322 64 execveat sys_execveat/ptregs
-323 common userfaultfd sys_userfaultfd
-324 common membarrier sys_membarrier
-325 common mlock2 sys_mlock2
-326 common copy_file_range sys_copy_file_range
-327 64 preadv2 sys_preadv2
-328 64 pwritev2 sys_pwritev2
-329 common pkey_mprotect sys_pkey_mprotect
-330 common pkey_alloc sys_pkey_alloc
-331 common pkey_free sys_pkey_free
-332 common statx sys_statx
+237 common mbind sys_mbind 6
+238 common set_mempolicy sys_set_mempolicy 3
+239 common get_mempolicy sys_get_mempolicy 5
+240 common mq_open sys_mq_open 4
+241 common mq_unlink sys_mq_unlink 1
+242 common mq_timedsend sys_mq_timedsend 5
+243 common mq_timedreceive sys_mq_timedreceive 5
+244 64 mq_notify sys_mq_notify 2
+245 common mq_getsetattr sys_mq_getsetattr 3
+246 64 kexec_load sys_kexec_load 4
+247 64 waitid sys_waitid 5
+248 common add_key sys_add_key 5
+249 common request_key sys_request_key 4
+250 common keyctl sys_keyctl 5
+251 common ioprio_set sys_ioprio_set 3
+252 common ioprio_get sys_ioprio_get 2
+253 common inotify_init sys_inotify_init 1
+254 common inotify_add_watch sys_inotify_add_watch 3
+255 common inotify_rm_watch sys_inotify_rm_watch 2
+256 common migrate_pages sys_migrate_pages 4
+257 common openat sys_openat 4
+258 common mkdirat sys_mkdirat 3
+259 common mknodat sys_mknodat 4
+260 common fchownat sys_fchownat 5
+261 common futimesat sys_futimesat 3
+262 common newfstatat sys_newfstatat 4
+263 common unlinkat sys_unlinkat 3
+264 common renameat sys_renameat 4
+265 common linkat sys_linkat 5
+266 common symlinkat sys_symlinkat 3
+267 common readlinkat sys_readlinkat 4
+268 common fchmodat sys_fchmodat 3
+269 common faccessat sys_faccessat 3
+270 common pselect6 sys_pselect6 6
+271 common ppoll sys_ppoll 5
+272 common unshare sys_unshare 1
+273 64 set_robust_list sys_set_robust_list 2
+274 64 get_robust_list sys_get_robust_list 3
+275 common splice sys_splice 6
+276 common tee sys_tee 4
+277 common sync_file_range sys_sync_file_range 4
+278 64 vmsplice sys_vmsplice 4
+279 64 move_pages sys_move_pages 6
+280 common utimensat sys_utimensat 4
+281 common epoll_pwait sys_epoll_pwait 6
+282 common signalfd sys_signalfd 3
+283 common timerfd_create sys_timerfd_create 2
+284 common eventfd sys_eventfd 1
+285 common fallocate sys_fallocate 4
+286 common timerfd_settime sys_timerfd_settime 4
+287 common timerfd_gettime sys_timerfd_gettime 2
+288 common accept4 sys_accept4 4
+289 common signalfd4 sys_signalfd4 4
+290 common eventfd2 sys_eventfd2 2
+291 common epoll_create1 sys_epoll_create1 1
+292 common dup3 sys_dup3 3
+293 common pipe2 sys_pipe2 2
+294 common inotify_init1 sys_inotify_init1 1
+295 64 preadv sys_preadv 5
+296 64 pwritev sys_pwritev 5
+297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo 4
+298 common perf_event_open sys_perf_event_open 5
+299 64 recvmmsg sys_recvmmsg 5
+300 common fanotify_init sys_fanotify_init 2
+301 common fanotify_mark sys_fanotify_mark 5
+302 common prlimit64 sys_prlimit64 4
+303 common name_to_handle_at sys_name_to_handle_at 5
+304 common open_by_handle_at sys_open_by_handle_at 3
+305 common clock_adjtime sys_clock_adjtime 2
+306 common syncfs sys_syncfs 1
+307 64 sendmmsg sys_sendmmsg 4
+308 common setns sys_setns 2
+309 common getcpu sys_getcpu 3
+310 64 process_vm_readv sys_process_vm_readv 6
+311 64 process_vm_writev sys_process_vm_writev 6
+312 common kcmp sys_kcmp 5
+313 common finit_module sys_finit_module 3
+314 common sched_setattr sys_sched_setattr 3
+315 common sched_getattr sys_sched_getattr 4
+316 common renameat2 sys_renameat2 5
+317 common seccomp sys_seccomp 3
+318 common getrandom sys_getrandom 3
+319 common memfd_create sys_memfd_create 2
+320 common kexec_file_load sys_kexec_file_load 5
+321 common bpf sys_bpf 3
+322 64 execveat sys_execveat/ptregs 5
+323 common userfaultfd sys_userfaultfd 1
+324 common membarrier sys_membarrier 2
+325 common mlock2 sys_mlock2 3
+326 common copy_file_range sys_copy_file_range 6
+327 64 preadv2 sys_preadv2 6
+328 64 pwritev2 sys_pwritev2 6
+329 common pkey_mprotect sys_pkey_mprotect 4
+330 common pkey_alloc sys_pkey_alloc 2
+331 common pkey_free sys_pkey_free 1
+332 common statx sys_statx 5
#
# x32-specific system call numbers start at 512 to avoid cache impact
# for native 64-bit operation.
#
-512 x32 rt_sigaction compat_sys_rt_sigaction
-513 x32 rt_sigreturn sys32_x32_rt_sigreturn
-514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
-517 x32 recvfrom compat_sys_recvfrom
-518 x32 sendmsg compat_sys_sendmsg
-519 x32 recvmsg compat_sys_recvmsg
-520 x32 execve compat_sys_execve/ptregs
-521 x32 ptrace compat_sys_ptrace
-522 x32 rt_sigpending compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
-524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
-525 x32 sigaltstack compat_sys_sigaltstack
-526 x32 timer_create compat_sys_timer_create
-527 x32 mq_notify compat_sys_mq_notify
-528 x32 kexec_load compat_sys_kexec_load
-529 x32 waitid compat_sys_waitid
-530 x32 set_robust_list compat_sys_set_robust_list
-531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
-533 x32 move_pages compat_sys_move_pages
-534 x32 preadv compat_sys_preadv64
-535 x32 pwritev compat_sys_pwritev64
-536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg compat_sys_recvmmsg
-538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
-541 x32 setsockopt compat_sys_setsockopt
-542 x32 getsockopt compat_sys_getsockopt
-543 x32 io_setup compat_sys_io_setup
-544 x32 io_submit compat_sys_io_submit
-545 x32 execveat compat_sys_execveat/ptregs
-546 x32 preadv2 compat_sys_preadv64v2
-547 x32 pwritev2 compat_sys_pwritev64v2
+512 x32 rt_sigaction compat_sys_rt_sigaction 5
+513 x32 rt_sigreturn sys32_x32_rt_sigreturn 0
+514 x32 ioctl compat_sys_ioctl 3
+515 x32 readv compat_sys_readv 3
+516 x32 writev compat_sys_writev 3
+517 x32 recvfrom compat_sys_recvfrom 6
+518 x32 sendmsg compat_sys_sendmsg 3
+519 x32 recvmsg compat_sys_recvmsg 3
+520 x32 execve compat_sys_execve/ptregs 3
+521 x32 ptrace compat_sys_ptrace 4
+522 x32 rt_sigpending compat_sys_rt_sigpending 2
+523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait 4
+524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo 3
+525 x32 sigaltstack compat_sys_sigaltstack 2
+526 x32 timer_create compat_sys_timer_create 3
+527 x32 mq_notify compat_sys_mq_notify 2
+528 x32 kexec_load compat_sys_kexec_load 4
+529 x32 waitid compat_sys_waitid 5
+530 x32 set_robust_list compat_sys_set_robust_list 2
+531 x32 get_robust_list compat_sys_get_robust_list 3
+532 x32 vmsplice compat_sys_vmsplice 4
+533 x32 move_pages compat_sys_move_pages 6
+534 x32 preadv compat_sys_preadv64 5
+535 x32 pwritev compat_sys_pwritev64 5
+536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 4
+537 x32 recvmmsg compat_sys_recvmmsg 5
+538 x32 sendmmsg compat_sys_sendmmsg 4
+539 x32 process_vm_readv compat_sys_process_vm_readv 6
+540 x32 process_vm_writev compat_sys_process_vm_writev 6
+541 x32 setsockopt compat_sys_setsockopt 5
+542 x32 getsockopt compat_sys_getsockopt 5
+543 x32 io_setup compat_sys_io_setup 2
+544 x32 io_submit compat_sys_io_submit 3
+545 x32 execveat compat_sys_execveat/ptregs 5
+546 x32 preadv2 compat_sys_preadv64v2 6
+547 x32 pwritev2 compat_sys_pwritev64v2 6
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index d71ef4bd3615..bb8a12f32610 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -8,6 +8,7 @@ syscall_macro() {
abi="$1"
nr="$2"
entry="$3"
+ num="$4"
# Entry can be either just a function name or "function/qualifier"
real_entry="${entry%%/*}"
@@ -47,7 +48,11 @@ emit() {
}
grep '^[0-9]' "$in" | sort -n | (
- while read nr abi name entry compat; do
+ while read nr abi name entry compat num; do
+ case "$compat" in
+ [0-9]*) num="$compat" ; compat="" ;
+ esac
+
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
# COMMON is the same as 64, except that we don't expect X32
--
2.14.3
From: Andi Kleen <[email protected]>
Clear all registers for compat calls on 64bit kernels. All arguments
are initially passed through the stack, so this is fairly simple
without additional stubs.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/entry_64_compat.S | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 98d5358e4041..16fd2643a77f 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -95,6 +95,8 @@ ENTRY(entry_SYSENTER_compat)
pushq $0 /* pt_regs->r14 = 0 */
pushq $0 /* pt_regs->r15 = 0 */
cld
+ /* Can clear all because arguments are passed through the stack */
+ CLEAR_ALL_REGS
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -223,6 +225,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq $0 /* pt_regs->r13 = 0 */
pushq $0 /* pt_regs->r14 = 0 */
pushq $0 /* pt_regs->r15 = 0 */
+ /* Can clear all because arguments are passed through the stack */
+ CLEAR_ALL_REGS
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -348,6 +352,8 @@ ENTRY(entry_INT80_compat)
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
cld
+ /* Can clear all because arguments are passed through the stack */
+ CLEAR_ALL_REGS
/*
* User mode is traced as though IRQs are on, and the interrupt
--
2.14.3
From: Andi Kleen <[email protected]>
Clear all registers on entering the 64bit kernel for exceptions and
interrupts.
Since there are no arguments this is fairly simple.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/entry_64.S | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 632081fd7086..6ab4c2aaeabb 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -636,6 +636,7 @@ END(irq_entries_start)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
testb $3, CS(%rsp)
@@ -1192,6 +1193,7 @@ ENTRY(xen_failsafe_callback)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
@@ -1237,6 +1239,7 @@ ENTRY(paranoid_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
@@ -1289,6 +1292,7 @@ ENTRY(error_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER 8
xorl %ebx, %ebx
testb $3, CS+8(%rsp)
@@ -1487,6 +1491,7 @@ ENTRY(nmi)
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
UNWIND_HINT_REGS
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
/*
--
2.14.3
From: Andi Kleen <[email protected]>
On a 32bit kernel clearing registers is much simpler than
on 64bit. The arguments for syscalls are initially passed
to a C function through the stack, so there's no need
to figure out how many arguments to clear.
So we always clear all registers (except frame pointer) for
all entry points.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/entry_32.S | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d2ef7f32905b..aee1085534ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -221,6 +221,18 @@
POP_GS_EX
.endm
+.macro CLEAR_ALL_REGS
+#ifdef CONFIG_FRAME_POINTER
+ xorl %ebp, %ebp
+#endif
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %edi, %edi
+ xorl %esi, %esi
+.endm
+
/*
* %eax: prev task
* %edx: next task
@@ -428,6 +440,7 @@ ENTRY(entry_SYSENTER_32)
pushl $0 /* pt_regs->ip = 0 (placeholder) */
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
+ CLEAR_ALL_REGS
/*
* SYSENTER doesn't filter flags, so we need to clear NT, AC
@@ -539,6 +552,7 @@ ENTRY(entry_INT80_32)
ASM_CLAC
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
+ CLEAR_ALL_REGS
/*
* User mode is traced as though IRQs are on, and the interrupt gate
@@ -673,6 +687,7 @@ common_interrupt:
ASM_CLAC
addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
SAVE_ALL
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
movl %esp, %eax
@@ -685,6 +700,7 @@ ENTRY(name) \
ASM_CLAC; \
pushl $~(nr); \
SAVE_ALL; \
+ CLEAR_ALL_REGS; \
ENCODE_FRAME_POINTER; \
TRACE_IRQS_OFF \
movl %esp, %eax; \
@@ -812,6 +828,7 @@ END(spurious_interrupt_bug)
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
@@ -867,6 +884,7 @@ ENTRY(xen_failsafe_callback)
jmp iret_exc
5: pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
jmp ret_from_exception
@@ -921,6 +939,7 @@ common_exception:
pushl %edx
pushl %ecx
pushl %ebx
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
cld
movl $(__KERNEL_PERCPU), %ecx
@@ -954,6 +973,7 @@ ENTRY(debug)
ASM_CLAC
pushl $-1 # mark this as an int
SAVE_ALL
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
xorl %edx, %edx # error code 0
movl %esp, %eax # pt_regs pointer
@@ -998,6 +1018,7 @@ ENTRY(nmi)
pushl %eax # pt_regs->orig_ax
SAVE_ALL
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
@@ -1038,6 +1059,7 @@ ENTRY(nmi)
.endr
pushl %eax
SAVE_ALL
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx, %edx # zero error code
@@ -1052,6 +1074,7 @@ ENTRY(int3)
ASM_CLAC
pushl $-1 # mark this as an int
SAVE_ALL
+ CLEAR_ALL_REGS
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
xorl %edx, %edx # zero error code
--
2.14.3
From: Andi Kleen <[email protected]>
We clear all the non argument registers for 64bit SYSCALLs
to minimize any risk of bad speculation using user values.
So far unused argument registers still leak. To be addressed
in future patches.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/entry/entry_64.S | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index bbdfbdd817d6..632081fd7086 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -236,6 +236,14 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp
SAVE_EXTRA_REGS
+ /* Sanitize registers against speculation attacks */
+ /* r10 is cleared later, arguments are handled in san_args* */
+ CLEAR_R11_TO_R15
+#ifndef CONFIG_FRAME_POINTER
+ xor %ebp, %ebp
+#endif
+ xor %ebx, %ebx
+ xor %ecx, %ecx
UNWIND_HINT_REGS extra=0
@@ -263,6 +271,7 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
+ xor %r10, %r10
#ifdef CONFIG_RETPOLINE
movq sys_call_table(, %rax, 8), %rax
--
2.14.3
> On Jan 9, 2018, at 5:03 PM, Andi Kleen <[email protected]> wrote:
>
> This patch kit implements clearing of all unused registers on kernel entries,
> including system calls and all exceptions and interrupt.
>
> This doesn't fix any known issue, but will make it harder in general
> to exploit the kernel with speculation because it will be harder
> to get user controlled values into kernel code.
I don't like this at all. Once upon a time, Linux syscalls were supposed to be fast. Then we learned about the Meltdown screwup, so we mostly fixed it for real upstream and the distroa seriously half-arsed their own fixes [1]. This came with a big performance cost, but it can be turned off on non-busted hardware. So be it.
But now we're proposing to throw out the whole fast path because it might make it a bit harder to do the most obvious attack. Not very hard, mind you, but a little bit harder. And there's no off switch for less-leaky hardware. No thanks.
Meanwhile we're doing nothing whatsoever to mitigate cross-process attacks because we can't do anything about it short of turning IBRS on systemwide.
> On Jan 9, 2018, at 5:03 PM, Andi Kleen <[email protected]> wrote:
>
> From: Andi Kleen <[email protected]>
>
> Clear all registers on entering the 64bit kernel for exceptions and
> interrupts.
>
> Since there are no arguments this is fairly simple.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 632081fd7086..6ab4c2aaeabb 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -636,6 +636,7 @@ END(irq_entries_start)
> ALLOC_PT_GPREGS_ON_STACK
> SAVE_C_REGS
> SAVE_EXTRA_REGS
> + CLEAR_ALL_REGS
> ENCODE_FRAME_POINTER
>
> testb $3, CS(%rsp)
> @@ -1192,6 +1193,7 @@ ENTRY(xen_failsafe_callback)
> ALLOC_PT_GPREGS_ON_STACK
> SAVE_C_REGS
> SAVE_EXTRA_REGS
> + CLEAR_ALL_REGS
> ENCODE_FRAME_POINTER
If CLEAR_ALL_REGS does what it sounds like, then its overkill here.
I could get behind this patch in general, though. Interrupts are so slow the the overhead probably doesn't matter.
> On Jan 9, 2018, at 5:03 PM, Andi Kleen <[email protected]> wrote:
>
> From: Andi Kleen <[email protected]>
>
> On a 32bit kernel clearing registers is much simpler than
> on 64bit. The arguments for syscalls are initially passed
> to a C function through the stack, so there's no need
> to figure out how many arguments to clear.
Why are we even trying to improve the situation on 32-bit? Unless someone actually tries to implement PTI, this seems useless.
> On Jan 9, 2018, at 5:03 PM, Andi Kleen <[email protected]> wrote:
>
> From: Andi Kleen <[email protected]>
>
> In order to sanitize the system call arguments properly
> we need to know the number of syscall arguments for each
> syscall. Add a new column to the 32bit and 64bit syscall
> tables to list the number of arguments.
>
Surely we can do this in the SYSCALL_DEFINE macros. Or at least statically check it.
Also, what attack are we protecting against anyway?
> I don't like this at all. Once upon a time, Linux syscalls were supposed to be fast. Then we learned about the Meltdown screwup, so we mostly fixed it for real upstream and the distroa seriously half-arsed their own fixes [1]. This came with a big performance cost, but it can be turned off on non-busted hardware. So be it.
That's true, but modern CPUs are also a lot faster/wider than the K8
the fast path was originally designed for. A modern CPU can go through
these instructions really fast with a very high IPC because they don't have
dependencies or stalls.
So it shouldn't hurt very much.
Also in fact when the fast path was originally written the ABI still had a
different caller/callee split which made it more better. Later on
it already lost some of its benefits and was less of a win.
> But now we're proposing to throw out the whole fast path because it might make it a bit harder to do the most obvious attack. Not very hard, mind you, but a little bit harder. And there's no off switch for less-leaky hardware. No thanks.
Well the off switch is a fast CPU.
-Andi
On Jan 9, 2018, at 5:34 PM, Andi Kleen <[email protected]> wrote:
>> I don't like this at all. Once upon a time, Linux syscalls were supposed to be fast. Then we learned about the Meltdown screwup, so we mostly fixed it for real upstream and the distroa seriously half-arsed their own fixes [1]. This came with a big performance cost, but it can be turned off on non-busted hardware. So be it.
>
> That's true, but modern CPUs are also a lot faster/wider than the K8
> the fast path was originally designed for. A modern CPU can go through
> these instructions really fast with a very high IPC because they don't have
> dependencies or stalls.
>
> So it shouldn't hurt very much.
>
> Also in fact when the fast path was originally written the ABI still had a
> different caller/callee split which made it more better. Later on
> it already lost some of its benefits and was less of a win.
>
>> But now we're proposing to throw out the whole fast path because it might make it a bit harder to do the most obvious attack. Not very hard, mind you, but a little bit harder. And there's no off switch for less-leaky hardware. No thanks.
>
> Well the off switch is a fast CPU.
When I rewrote the fast path, I did it on SNB. Not much has changed.
This patch should come with benchmarks (with PTI off).
And Intel needs to come up with real fixes for this stuff.
On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen <[email protected]> wrote:
> From: Andi Kleen <[email protected]>
>
> Remove the partial stack frame in the 64bit syscall fast path.
> In the next patch we want to clear the extra registers, which requires
> to always save all registers. So remove the partial stack frame
> in the syscall fast path and always save everything.
>
> This actually simplifies the code because the ptregs stubs
> are not needed anymore.
>
> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
> arch/x86/entry/syscall_64.c | 2 +-
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
> arch/x86/entry/syscall_64.c | 2 +-
> 2 files changed, 5 insertions(+), 54 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 58dbf7a12a05..bbdfbdd817d6 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
> pushq %r9 /* pt_regs->r9 */
> pushq %r10 /* pt_regs->r10 */
> pushq %r11 /* pt_regs->r11 */
> - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
> + sub $(6*8), %rsp
> + SAVE_EXTRA_REGS
> +
Continue using pushes here
> UNWIND_HINT_REGS extra=0
>
> /*
> @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath:
> ja 1f /* return -ENOSYS (already in pt_regs->ax) */
> movq %r10, %rcx
>
> - /*
> - * This call instruction is handled specially in stub_ptregs_64.
> - * It might end up jumping to the slow path. If it jumps, RAX
> - * and all argument registers are clobbered.
> - */
> #ifdef CONFIG_RETPOLINE
> movq sys_call_table(, %rax, 8), %rax
> call __x86_indirect_thunk_rax
> @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath:
> TRACE_IRQS_ON /* user mode is traced as IRQs on */
> movq RIP(%rsp), %rcx
> movq EFLAGS(%rsp), %r11
> - addq $6*8, %rsp /* skip extra regs -- they were preserved */
> - UNWIND_HINT_EMPTY
> - jmp .Lpop_c_regs_except_rcx_r11_and_sysret
> + jmp syscall_return_via_sysret
>
> 1:
> /*
> @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath:
> */
> TRACE_IRQS_ON
> ENABLE_INTERRUPTS(CLBR_ANY)
> - SAVE_EXTRA_REGS
> movq %rsp, %rdi
> call syscall_return_slowpath /* returns with IRQs disabled */
> jmp return_from_SYSCALL_64
>
> entry_SYSCALL64_slow_path:
> /* IRQs are off. */
> - SAVE_EXTRA_REGS
> movq %rsp, %rdi
> call do_syscall_64 /* returns with IRQs disabled */
>
> @@ -389,7 +382,6 @@ syscall_return_via_sysret:
> /* rcx and r11 are already restored (see code above) */
> UNWIND_HINT_EMPTY
> POP_EXTRA_REGS
> -.Lpop_c_regs_except_rcx_r11_and_sysret:
> popq %rsi /* skip r11 */
> popq %r10
> popq %r9
> @@ -420,47 +412,6 @@ syscall_return_via_sysret:
> USERGS_SYSRET64
> END(entry_SYSCALL_64)
>
> -ENTRY(stub_ptregs_64)
> - /*
> - * Syscalls marked as needing ptregs land here.
> - * If we are on the fast path, we need to save the extra regs,
> - * which we achieve by trying again on the slow path. If we are on
> - * the slow path, the extra regs are already saved.
> - *
> - * RAX stores a pointer to the C function implementing the syscall.
> - * IRQs are on.
> - */
> - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
> - jne 1f
> -
> - /*
> - * Called from fast path -- disable IRQs again, pop return address
> - * and jump to slow path
> - */
> - DISABLE_INTERRUPTS(CLBR_ANY)
> - TRACE_IRQS_OFF
> - popq %rax
> - UNWIND_HINT_REGS extra=0
> - jmp entry_SYSCALL64_slow_path
> -
> -1:
> - JMP_NOSPEC %rax /* Called from C */
> -END(stub_ptregs_64)
> -
> -.macro ptregs_stub func
> -ENTRY(ptregs_\func)
> - UNWIND_HINT_FUNC
> - leaq \func(%rip), %rax
> - jmp stub_ptregs_64
> -END(ptregs_\func)
> -.endm
> -
> -/* Instantiate ptregs_stub for each ptregs-using syscall */
> -#define __SYSCALL_64_QUAL_(sym)
> -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
> -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
> -#include <asm/syscalls_64.h>
> -
You can't just blindly remove this. We need to make sure that
syscalls that modify registers take the slow path exit, because they
may change the registers to be incompatible with SYSRET.
--
Brian Gerst
On Tue, Jan 09, 2018 at 05:26:43PM -0800, Andy Lutomirski wrote:
>
>
> > On Jan 9, 2018, at 5:03 PM, Andi Kleen <[email protected]> wrote:
> >
> > From: Andi Kleen <[email protected]>
> >
> > In order to sanitize the system call arguments properly
> > we need to know the number of syscall arguments for each
> > syscall. Add a new column to the 32bit and 64bit syscall
> > tables to list the number of arguments.
> >
>
> Surely we can do this in the SYSCALL_DEFINE macros. Or at least statically check it.
Possibly. The assembler would be much uglier as inline assembler though.
And adding the number shouldn't be a big burden when adding a system call.
I don't know how to check statically.
>
> Also, what attack are we protecting against anyway?
There's no specific attack here.
But the idea is to make it harder to inject values into the kernel to abuse
with speculation.
-Andi
On Tue, Jan 9, 2018 at 8:37 PM, Andi Kleen <[email protected]> wrote:
> On Tue, Jan 09, 2018 at 05:26:43PM -0800, Andy Lutomirski wrote:
>>
>>
>> > On Jan 9, 2018, at 5:03 PM, Andi Kleen <[email protected]> wrote:
>> >
>> > From: Andi Kleen <[email protected]>
>> >
>> > In order to sanitize the system call arguments properly
>> > we need to know the number of syscall arguments for each
>> > syscall. Add a new column to the 32bit and 64bit syscall
>> > tables to list the number of arguments.
>> >
>>
>> Surely we can do this in the SYSCALL_DEFINE macros. Or at least statically check it.
>
> Possibly. The assembler would be much uglier as inline assembler though.
> And adding the number shouldn't be a big burden when adding a system call.
>
> I don't know how to check statically.
>
Somehow parse out the SYSCALL_DEFINE() macros at build time and check
the numbers. Or munge the number into the SyS_ wrapper so we'd have
SyS0_fork but SyS3_read.
>>
>> Also, what attack are we protecting against anyway?
>
> There's no specific attack here.
>
> But the idea is to make it harder to inject values into the kernel to abuse
> with speculation.
I think a bit stronger justification would be good here.
On Tue, Jan 09, 2018 at 09:46:16PM -0500, Brian Gerst wrote:
> On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen <[email protected]> wrote:
> > From: Andi Kleen <[email protected]>
> >
> > Remove the partial stack frame in the 64bit syscall fast path.
> > In the next patch we want to clear the extra registers, which requires
> > to always save all registers. So remove the partial stack frame
> > in the syscall fast path and always save everything.
> >
> > This actually simplifies the code because the ptregs stubs
> > are not needed anymore.
> >
> > arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
> > arch/x86/entry/syscall_64.c | 2 +-
> >
> > Signed-off-by: Andi Kleen <[email protected]>
> > ---
> > arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
> > arch/x86/entry/syscall_64.c | 2 +-
> > 2 files changed, 5 insertions(+), 54 deletions(-)
> >
> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> > index 58dbf7a12a05..bbdfbdd817d6 100644
> > --- a/arch/x86/entry/entry_64.S
> > +++ b/arch/x86/entry/entry_64.S
> > @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
> > pushq %r9 /* pt_regs->r9 */
> > pushq %r10 /* pt_regs->r10 */
> > pushq %r11 /* pt_regs->r11 */
> > - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
> > + sub $(6*8), %rsp
> > + SAVE_EXTRA_REGS
> > +
>
> Continue using pushes here
>
> > UNWIND_HINT_REGS extra=0
> >
> > /*
> > @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath:
> > ja 1f /* return -ENOSYS (already in pt_regs->ax) */
> > movq %r10, %rcx
> >
> > - /*
> > - * This call instruction is handled specially in stub_ptregs_64.
> > - * It might end up jumping to the slow path. If it jumps, RAX
> > - * and all argument registers are clobbered.
> > - */
> > #ifdef CONFIG_RETPOLINE
> > movq sys_call_table(, %rax, 8), %rax
> > call __x86_indirect_thunk_rax
> > @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath:
> > TRACE_IRQS_ON /* user mode is traced as IRQs on */
> > movq RIP(%rsp), %rcx
> > movq EFLAGS(%rsp), %r11
> > - addq $6*8, %rsp /* skip extra regs -- they were preserved */
> > - UNWIND_HINT_EMPTY
> > - jmp .Lpop_c_regs_except_rcx_r11_and_sysret
> > + jmp syscall_return_via_sysret
> >
> > 1:
> > /*
> > @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath:
> > */
> > TRACE_IRQS_ON
> > ENABLE_INTERRUPTS(CLBR_ANY)
> > - SAVE_EXTRA_REGS
> > movq %rsp, %rdi
> > call syscall_return_slowpath /* returns with IRQs disabled */
> > jmp return_from_SYSCALL_64
> >
> > entry_SYSCALL64_slow_path:
> > /* IRQs are off. */
> > - SAVE_EXTRA_REGS
> > movq %rsp, %rdi
> > call do_syscall_64 /* returns with IRQs disabled */
> >
> > @@ -389,7 +382,6 @@ syscall_return_via_sysret:
> > /* rcx and r11 are already restored (see code above) */
> > UNWIND_HINT_EMPTY
> > POP_EXTRA_REGS
> > -.Lpop_c_regs_except_rcx_r11_and_sysret:
> > popq %rsi /* skip r11 */
> > popq %r10
> > popq %r9
> > @@ -420,47 +412,6 @@ syscall_return_via_sysret:
> > USERGS_SYSRET64
> > END(entry_SYSCALL_64)
> >
> > -ENTRY(stub_ptregs_64)
> > - /*
> > - * Syscalls marked as needing ptregs land here.
> > - * If we are on the fast path, we need to save the extra regs,
> > - * which we achieve by trying again on the slow path. If we are on
> > - * the slow path, the extra regs are already saved.
> > - *
> > - * RAX stores a pointer to the C function implementing the syscall.
> > - * IRQs are on.
> > - */
> > - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
> > - jne 1f
> > -
> > - /*
> > - * Called from fast path -- disable IRQs again, pop return address
> > - * and jump to slow path
> > - */
> > - DISABLE_INTERRUPTS(CLBR_ANY)
> > - TRACE_IRQS_OFF
> > - popq %rax
> > - UNWIND_HINT_REGS extra=0
> > - jmp entry_SYSCALL64_slow_path
> > -
> > -1:
> > - JMP_NOSPEC %rax /* Called from C */
> > -END(stub_ptregs_64)
> > -
> > -.macro ptregs_stub func
> > -ENTRY(ptregs_\func)
> > - UNWIND_HINT_FUNC
> > - leaq \func(%rip), %rax
> > - jmp stub_ptregs_64
> > -END(ptregs_\func)
> > -.endm
> > -
> > -/* Instantiate ptregs_stub for each ptregs-using syscall */
> > -#define __SYSCALL_64_QUAL_(sym)
> > -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
> > -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
> > -#include <asm/syscalls_64.h>
> > -
>
> You can't just blindly remove this. We need to make sure that
> syscalls that modify registers take the slow path exit, because they
> may change the registers to be incompatible with SYSRET.
That's a good point. I checked the ptregs calls:
iopl: should be fine, we will be restoring the correct IOPL through
SYSRET
clone/fork: fine too, the original return is fine and ret_from_fork
takes care of the child
execve et.al.: we will be leaking r11(rflags), rcx(orig return) into
the new process. but that seems acceptable.
rt_sigreturn: that's the only one who has problems. I added a new
TIF_FULL_RESTORE to force it into the slow path.
-Andi
On Wed, Jan 10, 2018 at 7:16 PM, Andi Kleen <[email protected]> wrote:
> On Tue, Jan 09, 2018 at 09:46:16PM -0500, Brian Gerst wrote:
>> On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen <[email protected]> wrote:
>> > From: Andi Kleen <[email protected]>
>> >
>> > Remove the partial stack frame in the 64bit syscall fast path.
>> > In the next patch we want to clear the extra registers, which requires
>> > to always save all registers. So remove the partial stack frame
>> > in the syscall fast path and always save everything.
>> >
>> > This actually simplifies the code because the ptregs stubs
>> > are not needed anymore.
>> >
>> > arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
>> > arch/x86/entry/syscall_64.c | 2 +-
>> >
>> > Signed-off-by: Andi Kleen <[email protected]>
>> > ---
>> > arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
>> > arch/x86/entry/syscall_64.c | 2 +-
>> > 2 files changed, 5 insertions(+), 54 deletions(-)
>> >
>> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> > index 58dbf7a12a05..bbdfbdd817d6 100644
>> > --- a/arch/x86/entry/entry_64.S
>> > +++ b/arch/x86/entry/entry_64.S
>> > @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
>> > pushq %r9 /* pt_regs->r9 */
>> > pushq %r10 /* pt_regs->r10 */
>> > pushq %r11 /* pt_regs->r11 */
>> > - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
>> > + sub $(6*8), %rsp
>> > + SAVE_EXTRA_REGS
>> > +
>>
>> Continue using pushes here
>>
>> > UNWIND_HINT_REGS extra=0
>> >
>> > /*
>> > @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath:
>> > ja 1f /* return -ENOSYS (already in pt_regs->ax) */
>> > movq %r10, %rcx
>> >
>> > - /*
>> > - * This call instruction is handled specially in stub_ptregs_64.
>> > - * It might end up jumping to the slow path. If it jumps, RAX
>> > - * and all argument registers are clobbered.
>> > - */
>> > #ifdef CONFIG_RETPOLINE
>> > movq sys_call_table(, %rax, 8), %rax
>> > call __x86_indirect_thunk_rax
>> > @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath:
>> > TRACE_IRQS_ON /* user mode is traced as IRQs on */
>> > movq RIP(%rsp), %rcx
>> > movq EFLAGS(%rsp), %r11
>> > - addq $6*8, %rsp /* skip extra regs -- they were preserved */
>> > - UNWIND_HINT_EMPTY
>> > - jmp .Lpop_c_regs_except_rcx_r11_and_sysret
>> > + jmp syscall_return_via_sysret
>> >
>> > 1:
>> > /*
>> > @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath:
>> > */
>> > TRACE_IRQS_ON
>> > ENABLE_INTERRUPTS(CLBR_ANY)
>> > - SAVE_EXTRA_REGS
>> > movq %rsp, %rdi
>> > call syscall_return_slowpath /* returns with IRQs disabled */
>> > jmp return_from_SYSCALL_64
>> >
>> > entry_SYSCALL64_slow_path:
>> > /* IRQs are off. */
>> > - SAVE_EXTRA_REGS
>> > movq %rsp, %rdi
>> > call do_syscall_64 /* returns with IRQs disabled */
>> >
>> > @@ -389,7 +382,6 @@ syscall_return_via_sysret:
>> > /* rcx and r11 are already restored (see code above) */
>> > UNWIND_HINT_EMPTY
>> > POP_EXTRA_REGS
>> > -.Lpop_c_regs_except_rcx_r11_and_sysret:
>> > popq %rsi /* skip r11 */
>> > popq %r10
>> > popq %r9
>> > @@ -420,47 +412,6 @@ syscall_return_via_sysret:
>> > USERGS_SYSRET64
>> > END(entry_SYSCALL_64)
>> >
>> > -ENTRY(stub_ptregs_64)
>> > - /*
>> > - * Syscalls marked as needing ptregs land here.
>> > - * If we are on the fast path, we need to save the extra regs,
>> > - * which we achieve by trying again on the slow path. If we are on
>> > - * the slow path, the extra regs are already saved.
>> > - *
>> > - * RAX stores a pointer to the C function implementing the syscall.
>> > - * IRQs are on.
>> > - */
>> > - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
>> > - jne 1f
>> > -
>> > - /*
>> > - * Called from fast path -- disable IRQs again, pop return address
>> > - * and jump to slow path
>> > - */
>> > - DISABLE_INTERRUPTS(CLBR_ANY)
>> > - TRACE_IRQS_OFF
>> > - popq %rax
>> > - UNWIND_HINT_REGS extra=0
>> > - jmp entry_SYSCALL64_slow_path
>> > -
>> > -1:
>> > - JMP_NOSPEC %rax /* Called from C */
>> > -END(stub_ptregs_64)
>> > -
>> > -.macro ptregs_stub func
>> > -ENTRY(ptregs_\func)
>> > - UNWIND_HINT_FUNC
>> > - leaq \func(%rip), %rax
>> > - jmp stub_ptregs_64
>> > -END(ptregs_\func)
>> > -.endm
>> > -
>> > -/* Instantiate ptregs_stub for each ptregs-using syscall */
>> > -#define __SYSCALL_64_QUAL_(sym)
>> > -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
>> > -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
>> > -#include <asm/syscalls_64.h>
>> > -
>>
>> You can't just blindly remove this. We need to make sure that
>> syscalls that modify registers take the slow path exit, because they
>> may change the registers to be incompatible with SYSRET.
>
> That's a good point. I checked the ptregs calls:
>
> iopl: should be fine, we will be restoring the correct IOPL through
> SYSRET
> clone/fork: fine too, the original return is fine and ret_from_fork
> takes care of the child
> execve et.al.: we will be leaking r11(rflags), rcx(orig return) into
> the new process. but that seems acceptable.
We still need to check if we are loading a 32-bit binary. That must
return with IRET.
> rt_sigreturn: that's the only one who has problems. I added a new
> TIF_FULL_RESTORE to force it into the slow path.
--
Brian Gerst
> On Jan 10, 2018, at 4:16 PM, Andi Kleen <[email protected]> wrote:
>
>> On Tue, Jan 09, 2018 at 09:46:16PM -0500, Brian Gerst wrote:
>>> On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen <[email protected]> wrote:
>>> From: Andi Kleen <[email protected]>
>>>
>>> Remove the partial stack frame in the 64bit syscall fast path.
>>> In the next patch we want to clear the extra registers, which requires
>>> to always save all registers. So remove the partial stack frame
>>> in the syscall fast path and always save everything.
>>>
>>> This actually simplifies the code because the ptregs stubs
>>> are not needed anymore.
>>>
>>> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
>>> arch/x86/entry/syscall_64.c | 2 +-
>>>
>>> Signed-off-by: Andi Kleen <[email protected]>
>>> ---
>>> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
>>> arch/x86/entry/syscall_64.c | 2 +-
>>> 2 files changed, 5 insertions(+), 54 deletions(-)
>>>
>>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>>> index 58dbf7a12a05..bbdfbdd817d6 100644
>>> --- a/arch/x86/entry/entry_64.S
>>> +++ b/arch/x86/entry/entry_64.S
>>> @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
>>> pushq %r9 /* pt_regs->r9 */
>>> pushq %r10 /* pt_regs->r10 */
>>> pushq %r11 /* pt_regs->r11 */
>>> - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
>>> + sub $(6*8), %rsp
>>> + SAVE_EXTRA_REGS
>>> +
>>
>> Continue using pushes here
>>
>>> UNWIND_HINT_REGS extra=0
>>>
>>> /*
>>> @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath:
>>> ja 1f /* return -ENOSYS (already in pt_regs->ax) */
>>> movq %r10, %rcx
>>>
>>> - /*
>>> - * This call instruction is handled specially in stub_ptregs_64.
>>> - * It might end up jumping to the slow path. If it jumps, RAX
>>> - * and all argument registers are clobbered.
>>> - */
>>> #ifdef CONFIG_RETPOLINE
>>> movq sys_call_table(, %rax, 8), %rax
>>> call __x86_indirect_thunk_rax
>>> @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath:
>>> TRACE_IRQS_ON /* user mode is traced as IRQs on */
>>> movq RIP(%rsp), %rcx
>>> movq EFLAGS(%rsp), %r11
>>> - addq $6*8, %rsp /* skip extra regs -- they were preserved */
>>> - UNWIND_HINT_EMPTY
>>> - jmp .Lpop_c_regs_except_rcx_r11_and_sysret
>>> + jmp syscall_return_via_sysret
>>>
>>> 1:
>>> /*
>>> @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath:
>>> */
>>> TRACE_IRQS_ON
>>> ENABLE_INTERRUPTS(CLBR_ANY)
>>> - SAVE_EXTRA_REGS
>>> movq %rsp, %rdi
>>> call syscall_return_slowpath /* returns with IRQs disabled */
>>> jmp return_from_SYSCALL_64
>>>
>>> entry_SYSCALL64_slow_path:
>>> /* IRQs are off. */
>>> - SAVE_EXTRA_REGS
>>> movq %rsp, %rdi
>>> call do_syscall_64 /* returns with IRQs disabled */
>>>
>>> @@ -389,7 +382,6 @@ syscall_return_via_sysret:
>>> /* rcx and r11 are already restored (see code above) */
>>> UNWIND_HINT_EMPTY
>>> POP_EXTRA_REGS
>>> -.Lpop_c_regs_except_rcx_r11_and_sysret:
>>> popq %rsi /* skip r11 */
>>> popq %r10
>>> popq %r9
>>> @@ -420,47 +412,6 @@ syscall_return_via_sysret:
>>> USERGS_SYSRET64
>>> END(entry_SYSCALL_64)
>>>
>>> -ENTRY(stub_ptregs_64)
>>> - /*
>>> - * Syscalls marked as needing ptregs land here.
>>> - * If we are on the fast path, we need to save the extra regs,
>>> - * which we achieve by trying again on the slow path. If we are on
>>> - * the slow path, the extra regs are already saved.
>>> - *
>>> - * RAX stores a pointer to the C function implementing the syscall.
>>> - * IRQs are on.
>>> - */
>>> - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
>>> - jne 1f
>>> -
>>> - /*
>>> - * Called from fast path -- disable IRQs again, pop return address
>>> - * and jump to slow path
>>> - */
>>> - DISABLE_INTERRUPTS(CLBR_ANY)
>>> - TRACE_IRQS_OFF
>>> - popq %rax
>>> - UNWIND_HINT_REGS extra=0
>>> - jmp entry_SYSCALL64_slow_path
>>> -
>>> -1:
>>> - JMP_NOSPEC %rax /* Called from C */
>>> -END(stub_ptregs_64)
>>> -
>>> -.macro ptregs_stub func
>>> -ENTRY(ptregs_\func)
>>> - UNWIND_HINT_FUNC
>>> - leaq \func(%rip), %rax
>>> - jmp stub_ptregs_64
>>> -END(ptregs_\func)
>>> -.endm
>>> -
>>> -/* Instantiate ptregs_stub for each ptregs-using syscall */
>>> -#define __SYSCALL_64_QUAL_(sym)
>>> -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
>>> -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
>>> -#include <asm/syscalls_64.h>
>>> -
>>
>> You can't just blindly remove this. We need to make sure that
>> syscalls that modify registers take the slow path exit, because they
>> may change the registers to be incompatible with SYSRET.
>
> That's a good point. I checked the ptregs calls:
>
> iopl: should be fine, we will be restoring the correct IOPL through
> SYSRET
>
> clone/fork: fine too, the original return is fine and ret_from_fork
> takes care of the child
>
> execve et.al.: we will be leaking r11(rflags), rcx(orig return) into
> the new process. but that seems acceptable.
>
> rt_sigreturn: that's the only one who has problems. I added a new
> TIF_FULL_RESTORE to force it into the slow path.
>
So your series removes the old declarative annotation and then will add a new TI flag to make it work again?
This whole thing seems to be at the wrong end of the cost benefit curve.
> So your series removes the old declarative annotation and then will add a new TI flag to make it work again?
The flag is a lot simpler than the previous assembler mess.
I thought the general trend in entry* was to move assembler to C?
>
> This whole thing seems to be at the wrong end of the cost benefit curve.
But if you prefer assembler mess we can go back to it, no problem.
-Andi
On Wed, Jan 10, 2018 at 7:55 PM, Andy Lutomirski <[email protected]> wrote:
>
>
>> On Jan 10, 2018, at 4:16 PM, Andi Kleen <[email protected]> wrote:
>>
>>> On Tue, Jan 09, 2018 at 09:46:16PM -0500, Brian Gerst wrote:
>>>> On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen <[email protected]> wrote:
>>>> From: Andi Kleen <[email protected]>
>>>>
>>>> Remove the partial stack frame in the 64bit syscall fast path.
>>>> In the next patch we want to clear the extra registers, which requires
>>>> to always save all registers. So remove the partial stack frame
>>>> in the syscall fast path and always save everything.
>>>>
>>>> This actually simplifies the code because the ptregs stubs
>>>> are not needed anymore.
>>>>
>>>> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
>>>> arch/x86/entry/syscall_64.c | 2 +-
>>>>
>>>> Signed-off-by: Andi Kleen <[email protected]>
>>>> ---
>>>> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
>>>> arch/x86/entry/syscall_64.c | 2 +-
>>>> 2 files changed, 5 insertions(+), 54 deletions(-)
>>>>
>>>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>>>> index 58dbf7a12a05..bbdfbdd817d6 100644
>>>> --- a/arch/x86/entry/entry_64.S
>>>> +++ b/arch/x86/entry/entry_64.S
>>>> @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
>>>> pushq %r9 /* pt_regs->r9 */
>>>> pushq %r10 /* pt_regs->r10 */
>>>> pushq %r11 /* pt_regs->r11 */
>>>> - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
>>>> + sub $(6*8), %rsp
>>>> + SAVE_EXTRA_REGS
>>>> +
>>>
>>> Continue using pushes here
>>>
>>>> UNWIND_HINT_REGS extra=0
>>>>
>>>> /*
>>>> @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath:
>>>> ja 1f /* return -ENOSYS (already in pt_regs->ax) */
>>>> movq %r10, %rcx
>>>>
>>>> - /*
>>>> - * This call instruction is handled specially in stub_ptregs_64.
>>>> - * It might end up jumping to the slow path. If it jumps, RAX
>>>> - * and all argument registers are clobbered.
>>>> - */
>>>> #ifdef CONFIG_RETPOLINE
>>>> movq sys_call_table(, %rax, 8), %rax
>>>> call __x86_indirect_thunk_rax
>>>> @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath:
>>>> TRACE_IRQS_ON /* user mode is traced as IRQs on */
>>>> movq RIP(%rsp), %rcx
>>>> movq EFLAGS(%rsp), %r11
>>>> - addq $6*8, %rsp /* skip extra regs -- they were preserved */
>>>> - UNWIND_HINT_EMPTY
>>>> - jmp .Lpop_c_regs_except_rcx_r11_and_sysret
>>>> + jmp syscall_return_via_sysret
>>>>
>>>> 1:
>>>> /*
>>>> @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath:
>>>> */
>>>> TRACE_IRQS_ON
>>>> ENABLE_INTERRUPTS(CLBR_ANY)
>>>> - SAVE_EXTRA_REGS
>>>> movq %rsp, %rdi
>>>> call syscall_return_slowpath /* returns with IRQs disabled */
>>>> jmp return_from_SYSCALL_64
>>>>
>>>> entry_SYSCALL64_slow_path:
>>>> /* IRQs are off. */
>>>> - SAVE_EXTRA_REGS
>>>> movq %rsp, %rdi
>>>> call do_syscall_64 /* returns with IRQs disabled */
>>>>
>>>> @@ -389,7 +382,6 @@ syscall_return_via_sysret:
>>>> /* rcx and r11 are already restored (see code above) */
>>>> UNWIND_HINT_EMPTY
>>>> POP_EXTRA_REGS
>>>> -.Lpop_c_regs_except_rcx_r11_and_sysret:
>>>> popq %rsi /* skip r11 */
>>>> popq %r10
>>>> popq %r9
>>>> @@ -420,47 +412,6 @@ syscall_return_via_sysret:
>>>> USERGS_SYSRET64
>>>> END(entry_SYSCALL_64)
>>>>
>>>> -ENTRY(stub_ptregs_64)
>>>> - /*
>>>> - * Syscalls marked as needing ptregs land here.
>>>> - * If we are on the fast path, we need to save the extra regs,
>>>> - * which we achieve by trying again on the slow path. If we are on
>>>> - * the slow path, the extra regs are already saved.
>>>> - *
>>>> - * RAX stores a pointer to the C function implementing the syscall.
>>>> - * IRQs are on.
>>>> - */
>>>> - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
>>>> - jne 1f
>>>> -
>>>> - /*
>>>> - * Called from fast path -- disable IRQs again, pop return address
>>>> - * and jump to slow path
>>>> - */
>>>> - DISABLE_INTERRUPTS(CLBR_ANY)
>>>> - TRACE_IRQS_OFF
>>>> - popq %rax
>>>> - UNWIND_HINT_REGS extra=0
>>>> - jmp entry_SYSCALL64_slow_path
>>>> -
>>>> -1:
>>>> - JMP_NOSPEC %rax /* Called from C */
>>>> -END(stub_ptregs_64)
>>>> -
>>>> -.macro ptregs_stub func
>>>> -ENTRY(ptregs_\func)
>>>> - UNWIND_HINT_FUNC
>>>> - leaq \func(%rip), %rax
>>>> - jmp stub_ptregs_64
>>>> -END(ptregs_\func)
>>>> -.endm
>>>> -
>>>> -/* Instantiate ptregs_stub for each ptregs-using syscall */
>>>> -#define __SYSCALL_64_QUAL_(sym)
>>>> -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
>>>> -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
>>>> -#include <asm/syscalls_64.h>
>>>> -
>>>
>>> You can't just blindly remove this. We need to make sure that
>>> syscalls that modify registers take the slow path exit, because they
>>> may change the registers to be incompatible with SYSRET.
>>
>> That's a good point. I checked the ptregs calls:
>>
>> iopl: should be fine, we will be restoring the correct IOPL through
>> SYSRET
>>
>> clone/fork: fine too, the original return is fine and ret_from_fork
>> takes care of the child
>>
>> execve et.al.: we will be leaking r11(rflags), rcx(orig return) into
>> the new process. but that seems acceptable.
>>
>> rt_sigreturn: that's the only one who has problems. I added a new
>> TIF_FULL_RESTORE to force it into the slow path.
>>
>
> So your series removes the old declarative annotation and then will add a new TI flag to make it work again?
>
> This whole thing seems to be at the wrong end of the cost benefit curve.
We already check TIF flags after the syscall on the fast path. Adding
another bit to the mask costs nothing.
--
Brian Gerst
> > execve et.al.: we will be leaking r11(rflags), rcx(orig return) into
> > the new process. but that seems acceptable.
>
> We still need to check if we are loading a 32-bit binary. That must
> return with IRET.
True. Will fix.
-Andi
> On Jan 10, 2018, at 5:01 PM, Brian Gerst <[email protected]> wrote:
>
>> On Wed, Jan 10, 2018 at 7:55 PM, Andy Lutomirski <[email protected]> wrote:
>>
>>
>>>> On Jan 10, 2018, at 4:16 PM, Andi Kleen <[email protected]> wrote:
>>>>
>>>>> On Tue, Jan 09, 2018 at 09:46:16PM -0500, Brian Gerst wrote:
>>>>> On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen <[email protected]> wrote:
>>>>> From: Andi Kleen <[email protected]>
>>>>>
>>>>> Remove the partial stack frame in the 64bit syscall fast path.
>>>>> In the next patch we want to clear the extra registers, which requires
>>>>> to always save all registers. So remove the partial stack frame
>>>>> in the syscall fast path and always save everything.
>>>>>
>>>>> This actually simplifies the code because the ptregs stubs
>>>>> are not needed anymore.
>>>>>
>>>>> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
>>>>> arch/x86/entry/syscall_64.c | 2 +-
>>>>>
>>>>> Signed-off-by: Andi Kleen <[email protected]>
>>>>> ---
>>>>> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
>>>>> arch/x86/entry/syscall_64.c | 2 +-
>>>>> 2 files changed, 5 insertions(+), 54 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>>>>> index 58dbf7a12a05..bbdfbdd817d6 100644
>>>>> --- a/arch/x86/entry/entry_64.S
>>>>> +++ b/arch/x86/entry/entry_64.S
>>>>> @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
>>>>> pushq %r9 /* pt_regs->r9 */
>>>>> pushq %r10 /* pt_regs->r10 */
>>>>> pushq %r11 /* pt_regs->r11 */
>>>>> - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
>>>>> + sub $(6*8), %rsp
>>>>> + SAVE_EXTRA_REGS
>>>>> +
>>>>
>>>> Continue using pushes here
>>>>
>>>>> UNWIND_HINT_REGS extra=0
>>>>>
>>>>> /*
>>>>> @@ -262,11 +264,6 @@ entry_SYSCALL_64_fastpath:
>>>>> ja 1f /* return -ENOSYS (already in pt_regs->ax) */
>>>>> movq %r10, %rcx
>>>>>
>>>>> - /*
>>>>> - * This call instruction is handled specially in stub_ptregs_64.
>>>>> - * It might end up jumping to the slow path. If it jumps, RAX
>>>>> - * and all argument registers are clobbered.
>>>>> - */
>>>>> #ifdef CONFIG_RETPOLINE
>>>>> movq sys_call_table(, %rax, 8), %rax
>>>>> call __x86_indirect_thunk_rax
>>>>> @@ -293,9 +290,7 @@ entry_SYSCALL_64_fastpath:
>>>>> TRACE_IRQS_ON /* user mode is traced as IRQs on */
>>>>> movq RIP(%rsp), %rcx
>>>>> movq EFLAGS(%rsp), %r11
>>>>> - addq $6*8, %rsp /* skip extra regs -- they were preserved */
>>>>> - UNWIND_HINT_EMPTY
>>>>> - jmp .Lpop_c_regs_except_rcx_r11_and_sysret
>>>>> + jmp syscall_return_via_sysret
>>>>>
>>>>> 1:
>>>>> /*
>>>>> @@ -305,14 +300,12 @@ entry_SYSCALL_64_fastpath:
>>>>> */
>>>>> TRACE_IRQS_ON
>>>>> ENABLE_INTERRUPTS(CLBR_ANY)
>>>>> - SAVE_EXTRA_REGS
>>>>> movq %rsp, %rdi
>>>>> call syscall_return_slowpath /* returns with IRQs disabled */
>>>>> jmp return_from_SYSCALL_64
>>>>>
>>>>> entry_SYSCALL64_slow_path:
>>>>> /* IRQs are off. */
>>>>> - SAVE_EXTRA_REGS
>>>>> movq %rsp, %rdi
>>>>> call do_syscall_64 /* returns with IRQs disabled */
>>>>>
>>>>> @@ -389,7 +382,6 @@ syscall_return_via_sysret:
>>>>> /* rcx and r11 are already restored (see code above) */
>>>>> UNWIND_HINT_EMPTY
>>>>> POP_EXTRA_REGS
>>>>> -.Lpop_c_regs_except_rcx_r11_and_sysret:
>>>>> popq %rsi /* skip r11 */
>>>>> popq %r10
>>>>> popq %r9
>>>>> @@ -420,47 +412,6 @@ syscall_return_via_sysret:
>>>>> USERGS_SYSRET64
>>>>> END(entry_SYSCALL_64)
>>>>>
>>>>> -ENTRY(stub_ptregs_64)
>>>>> - /*
>>>>> - * Syscalls marked as needing ptregs land here.
>>>>> - * If we are on the fast path, we need to save the extra regs,
>>>>> - * which we achieve by trying again on the slow path. If we are on
>>>>> - * the slow path, the extra regs are already saved.
>>>>> - *
>>>>> - * RAX stores a pointer to the C function implementing the syscall.
>>>>> - * IRQs are on.
>>>>> - */
>>>>> - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
>>>>> - jne 1f
>>>>> -
>>>>> - /*
>>>>> - * Called from fast path -- disable IRQs again, pop return address
>>>>> - * and jump to slow path
>>>>> - */
>>>>> - DISABLE_INTERRUPTS(CLBR_ANY)
>>>>> - TRACE_IRQS_OFF
>>>>> - popq %rax
>>>>> - UNWIND_HINT_REGS extra=0
>>>>> - jmp entry_SYSCALL64_slow_path
>>>>> -
>>>>> -1:
>>>>> - JMP_NOSPEC %rax /* Called from C */
>>>>> -END(stub_ptregs_64)
>>>>> -
>>>>> -.macro ptregs_stub func
>>>>> -ENTRY(ptregs_\func)
>>>>> - UNWIND_HINT_FUNC
>>>>> - leaq \func(%rip), %rax
>>>>> - jmp stub_ptregs_64
>>>>> -END(ptregs_\func)
>>>>> -.endm
>>>>> -
>>>>> -/* Instantiate ptregs_stub for each ptregs-using syscall */
>>>>> -#define __SYSCALL_64_QUAL_(sym)
>>>>> -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
>>>>> -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
>>>>> -#include <asm/syscalls_64.h>
>>>>> -
>>>>
>>>> You can't just blindly remove this. We need to make sure that
>>>> syscalls that modify registers take the slow path exit, because they
>>>> may change the registers to be incompatible with SYSRET.
>>>
>>> That's a good point. I checked the ptregs calls:
>>>
>>> iopl: should be fine, we will be restoring the correct IOPL through
>>> SYSRET
>>>
>>> clone/fork: fine too, the original return is fine and ret_from_fork
>>> takes care of the child
>>>
>>> execve et.al.: we will be leaking r11(rflags), rcx(orig return) into
>>> the new process. but that seems acceptable.
>>>
>>> rt_sigreturn: that's the only one who has problems. I added a new
>>> TIF_FULL_RESTORE to force it into the slow path.
>>>
>>
>> So your series removes the old declarative annotation and then will add a new TI flag to make it work again?
>>
>> This whole thing seems to be at the wrong end of the cost benefit curve.
>
> We already check TIF flags after the syscall on the fast path. Adding
> another bit to the mask costs nothing.
>
What I mean is: this whole series is almost certainly a performance regression, it has no off switch, and is doesn't obviously solve any problem. It' didn't qualify as a so. And no one has benchmarked it. I think we should seriously consider just not applying it.
> What I mean is: this whole series is almost certainly a performance regression, it has no off switch, and is doesn't obviously solve any problem. It' didn't qualify as a so. And no one has benchmarked it. I think we should seriously consider just not applying it.
Well it's kernel hardening to guard against possible future speculation
attacks. Linus discussed it here for example:
https://www.mail-archive.com/[email protected]/msg1580667.html
For the on/off switch I can add a CONFIG to enable it, even though
it seems somewhat silly.
-Andi
On Tue, Jan 09, 2018 at 05:03:21PM -0800, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> Remove the partial stack frame in the 64bit syscall fast path.
> In the next patch we want to clear the extra registers, which requires
> to always save all registers. So remove the partial stack frame
> in the syscall fast path and always save everything.
>
> This actually simplifies the code because the ptregs stubs
> are not needed anymore.
>
> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
> arch/x86/entry/syscall_64.c | 2 +-
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------
> arch/x86/entry/syscall_64.c | 2 +-
> 2 files changed, 5 insertions(+), 54 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 58dbf7a12a05..bbdfbdd817d6 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -234,7 +234,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
> pushq %r9 /* pt_regs->r9 */
> pushq %r10 /* pt_regs->r10 */
> pushq %r11 /* pt_regs->r11 */
> - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
> + sub $(6*8), %rsp
> + SAVE_EXTRA_REGS
> +
> UNWIND_HINT_REGS extra=0
Now that the extra regs are being saved, the "extra=0" can be removed
from the unwind hint.
--
Josh
On Tue, Jan 9, 2018 at 8:03 PM, Andi Kleen <[email protected]> wrote:
> From: Andi Kleen <[email protected]>
>
> We clear all the non argument registers for 64bit SYSCALLs
> to minimize any risk of bad speculation using user values.
>
> So far unused argument registers still leak. To be addressed
> in future patches.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index bbdfbdd817d6..632081fd7086 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -236,6 +236,14 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
> pushq %r11 /* pt_regs->r11 */
> sub $(6*8), %rsp
> SAVE_EXTRA_REGS
> + /* Sanitize registers against speculation attacks */
> + /* r10 is cleared later, arguments are handled in san_args* */
> + CLEAR_R11_TO_R15
Don't need to explicitly clear R11 here. It is clobbered with current_task.
> +#ifndef CONFIG_FRAME_POINTER
> + xor %ebp, %ebp
> +#endif
> + xor %ebx, %ebx
> + xor %ecx, %ecx
>
> UNWIND_HINT_REGS extra=0
>
> @@ -263,6 +271,7 @@ entry_SYSCALL_64_fastpath:
> #endif
> ja 1f /* return -ENOSYS (already in pt_regs->ax) */
> movq %r10, %rcx
> + xor %r10, %r10
RCX is already clear, so xchgq %r10, %rcx will be simpler.
--
Brian Gerst
> Well it's kernel hardening to guard against possible future speculation
> attacks. Linus discussed it here for example:
>
> https://www.mail-archive.com/[email protected]/msg1580667.html
>
> For the on/off switch I can add a CONFIG to enable it, even though
> it seems somewhat silly.
I did some micro benchmarking now, sampling different real system
calls.
For the entry code (entry to call) I get on average 62 cycles
for the old code, vs 78 cycles with clear regs and full
stack frame saving on Skylake.
So it's roughly ~20 cycles difference, if we include the restore.
I would conclude 20 cycles are not significant for a syscall,
so there's not a lot of motivation to add a switch
for less security.
-Andi
On Wed, Jan 10, 2018 at 10:35:58PM -0500, Brian Gerst wrote:
> > @@ -263,6 +271,7 @@ entry_SYSCALL_64_fastpath:
> > #endif
> > ja 1f /* return -ENOSYS (already in pt_regs->ax) */
> > movq %r10, %rcx
> > + xor %r10, %r10
>
> RCX is already clear, so xchgq %r10, %rcx will be simpler.
XOR is special cased by the hardware, so it's always more
efficient.
-Andi
>
On Tue, Jan 09, 2018 at 05:03:22PM -0800, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> Add 64bit assembler macros to clear registers on kernel entry.
> Used in followon patches.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/entry/calling.h | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
> diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
> index 45a63e00a6af..9444e7623185 100644
> --- a/arch/x86/entry/calling.h
> +++ b/arch/x86/entry/calling.h
> @@ -172,6 +172,34 @@ For 32-bit we have the following conventions - kernel is built with
> .byte 0xf1
> .endm
>
> + .macro CLEAR_R11_TO_R15
> + xorq %r15, %r15
> + xorq %r14, %r14
> + xorq %r13, %r13
> + xorq %r12, %r12
> + xorq %r11, %r11
> + .endm
> +
> + .macro CLEAR_R8_TO_R15
> + CLEAR_R11_TO_R15
> + xorq %r10, %r10
> + xorq %r9, %r9
> + xorq %r8, %r8
> + .endm
> +
> + .macro CLEAR_ALL_REGS
> + CLEAR_R8_TO_R15
> + xorl %eax, %eax
> + xorl %ebx, %ebx
How come you use xorl vs xorq?
> + xorl %ecx, %ecx
> + xorl %edx, %edx
> + xorl %esi, %esi
> + xorl %edi, %edi
> +#ifndef CONFIG_FRAME_POINTER
> + xorl %ebp, %ebp
> +#endif
> + .endm
> +
> /*
> * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
> * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
> --
> 2.14.3
>
> > + .macro CLEAR_ALL_REGS
> > + CLEAR_R8_TO_R15
> > + xorl %eax, %eax
> > + xorl %ebx, %ebx
>
> How come you use xorl vs xorq?
64bit always zero extends 32->64, and xorl is one byte
shorter because it doesn't need a REX prefix.
-Andi
On Tue, Jan 09, 2018 at 05:03:21PM -0800, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> Remove the partial stack frame in the 64bit syscall fast path.
> In the next patch we want to clear the extra registers, which requires
> to always save all registers. So remove the partial stack frame
> in the syscall fast path and always save everything.
>
> This actually simplifies the code because the ptregs stubs
> are not needed anymore.
>
> arch/x86/entry/entry_64.S | 57 ++++-----------------------------------------------------
> arch/x86/entry/syscall_64.c | 2 +-
This diffstat doesn't need to be in the changelog.
--
Josh
On Tue, Jan 09, 2018 at 05:03:23PM -0800, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> We clear all the non argument registers for 64bit SYSCALLs
> to minimize any risk of bad speculation using user values.
>
> So far unused argument registers still leak. To be addressed
> in future patches.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index bbdfbdd817d6..632081fd7086 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -236,6 +236,14 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
> pushq %r11 /* pt_regs->r11 */
> sub $(6*8), %rsp
> SAVE_EXTRA_REGS
> + /* Sanitize registers against speculation attacks */
This comment isn't necessary, though it would be good to add comments
above the CLEAR macros themselves explaining why they're needed.
> + /* r10 is cleared later, arguments are handled in san_args* */
What is san_args?
> + CLEAR_R11_TO_R15
> +#ifndef CONFIG_FRAME_POINTER
> + xor %ebp, %ebp
> +#endif
Why is %rbp not cleared with CONFIG_FRAME_POINTER? Is it because it
will get clobbered by the first called function?
> + xor %ebx, %ebx
> + xor %ecx, %ecx
I think clearing %ecx isn't needed, it gets clobbered below for the fast
path, and gets clobbered by do_syscall_64() for the slow path.
>
> UNWIND_HINT_REGS extra=0
>
> @@ -263,6 +271,7 @@ entry_SYSCALL_64_fastpath:
> #endif
> ja 1f /* return -ENOSYS (already in pt_regs->ax) */
> movq %r10, %rcx
> + xor %r10, %r10
>
> #ifdef CONFIG_RETPOLINE
> movq sys_call_table(, %rax, 8), %rax
Now that the fast path is getting slower, I wonder if it still makes
sense to have a "fast path"? It would be good to see measurements
comparing the fast and slow paths.
--
Josh