Various flavors of stub_{sigreturn,execve}, six functions in total,
repeat
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
sequence. RESTORE_EXTRA_REGS is six 5-byte insns.
Reuse this code.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 21 ++++++---------------
1 file changed, 6 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 13185c5..765436c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -423,6 +423,7 @@ ENTRY(stub_execve)
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
call sys_execve
+return_from_stub:
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
@@ -435,9 +436,7 @@ ENTRY(stub_execveat)
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
call sys_execveat
- movq %rax,RAX(%rsp)
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+ jmp return_from_stub
CFI_ENDPROC
END(stub_execveat)
@@ -451,9 +450,7 @@ ENTRY(stub_rt_sigreturn)
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
call sys_rt_sigreturn
- movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+ jmp return_from_stub
CFI_ENDPROC
END(stub_rt_sigreturn)
@@ -464,9 +461,7 @@ ENTRY(stub_x32_rt_sigreturn)
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
call sys32_x32_rt_sigreturn
- movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+ jmp return_from_stub
CFI_ENDPROC
END(stub_x32_rt_sigreturn)
@@ -476,9 +471,7 @@ ENTRY(stub_x32_execve)
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
call compat_sys_execve
- movq %rax,RAX(%rsp)
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+ jmp return_from_stub
CFI_ENDPROC
END(stub_x32_execve)
@@ -488,9 +481,7 @@ ENTRY(stub_x32_execveat)
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
call compat_sys_execveat
- movq %rax,RAX(%rsp)
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+ jmp return_from_stub
CFI_ENDPROC
END(stub_x32_execveat)
--
1.8.1.4
Replace "call func; ret" with "jmp func".
Run-tested.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 765436c..ec51598 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -407,8 +407,7 @@ ENTRY(stub_\func)
CFI_STARTPROC
DEFAULT_FRAME 0, 8 /* offset 8: return address */
SAVE_EXTRA_REGS 8
- call sys_\func
- ret
+ jmp sys_\func
CFI_ENDPROC
END(stub_\func)
.endm
--
1.8.1.4
stub_sigreturn ignores old values of pt_regs->REG for all general-purpose
registers, it sets them to values saved on userspace
signal stack.
Which is hardly surprising - it would be a bug if it would use pt_regs->REG.
sigreturn must restore all registers.
Therefore, SAVE_EXTRA_REGS in it ought to be redundant.
It is a leftover from the time SAVE_EXTRA_REGS wasn't only saving registers,
but it also was extending stack to "full" pt_regs.
Delete this SAVE_EXTRA_REGS.
Run-tested.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ec51598..1cf245d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -447,7 +447,12 @@ ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
DEFAULT_FRAME 0
- SAVE_EXTRA_REGS
+ /*
+ * Despite RESTORE_EXTRA_REGS in return_from_stub,
+ * no need to SAVE_EXTRA_REGS here:
+ * sys_rt_sigreturn overwrites all general purpose pt_regs->REGs
+ * on stack, for RESTORE_{EXTRA,C}_REGS to pick them up.
+ */
call sys_rt_sigreturn
jmp return_from_stub
CFI_ENDPROC
@@ -458,7 +463,7 @@ ENTRY(stub_x32_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
DEFAULT_FRAME 0
- SAVE_EXTRA_REGS
+ /* No need to SAVE_EXTRA_REGS */
call sys32_x32_rt_sigreturn
jmp return_from_stub
CFI_ENDPROC
--
1.8.1.4
This allows to remove "addq $8, %rsp" from five of six execve and sigreturn stubs.
More importantly, it enables next optimization: faster return on execve failure.
Run-tested.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1cf245d..060cb2e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -418,11 +418,11 @@ END(stub_\func)
ENTRY(stub_execve)
CFI_STARTPROC
- addq $8, %rsp
- DEFAULT_FRAME 0
- SAVE_EXTRA_REGS
+ DEFAULT_FRAME 0, 8
+ SAVE_EXTRA_REGS 8
call sys_execve
return_from_stub:
+ addq $8, %rsp
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
@@ -431,9 +431,8 @@ END(stub_execve)
ENTRY(stub_execveat)
CFI_STARTPROC
- addq $8, %rsp
- DEFAULT_FRAME 0
- SAVE_EXTRA_REGS
+ DEFAULT_FRAME 0, 8
+ SAVE_EXTRA_REGS 8
call sys_execveat
jmp return_from_stub
CFI_ENDPROC
@@ -445,8 +444,7 @@ END(stub_execveat)
*/
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
- addq $8, %rsp
- DEFAULT_FRAME 0
+ DEFAULT_FRAME 0, 8
/*
* Despite RESTORE_EXTRA_REGS in return_from_stub,
* no need to SAVE_EXTRA_REGS here:
@@ -461,8 +459,7 @@ END(stub_rt_sigreturn)
#ifdef CONFIG_X86_X32_ABI
ENTRY(stub_x32_rt_sigreturn)
CFI_STARTPROC
- addq $8, %rsp
- DEFAULT_FRAME 0
+ DEFAULT_FRAME 0, 8
/* No need to SAVE_EXTRA_REGS */
call sys32_x32_rt_sigreturn
jmp return_from_stub
@@ -471,9 +468,8 @@ END(stub_x32_rt_sigreturn)
ENTRY(stub_x32_execve)
CFI_STARTPROC
- addq $8, %rsp
- DEFAULT_FRAME 0
- SAVE_EXTRA_REGS
+ DEFAULT_FRAME 0, 8
+ SAVE_EXTRA_REGS 8
call compat_sys_execve
jmp return_from_stub
CFI_ENDPROC
@@ -481,9 +477,8 @@ END(stub_x32_execve)
ENTRY(stub_x32_execveat)
CFI_STARTPROC
- addq $8, %rsp
- DEFAULT_FRAME 0
- SAVE_EXTRA_REGS
+ DEFAULT_FRAME 0, 8
+ SAVE_EXTRA_REGS 8
call compat_sys_execveat
jmp return_from_stub
CFI_ENDPROC
--
1.8.1.4
This makes failed execve's faster.
This also enables a future possible optimization: SAVE_EXTRA_REGS can be avoided
in execve's.
Run-tested.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 060cb2e..e8f2aeb 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -421,6 +421,11 @@ ENTRY(stub_execve)
DEFAULT_FRAME 0, 8
SAVE_EXTRA_REGS 8
call sys_execve
+return_from_exec:
+ testl %eax,%eax
+ jz return_from_stub
+ /* exec failed, can use fast SYSRET code path in this case */
+ ret
return_from_stub:
addq $8, %rsp
movq %rax,RAX(%rsp)
@@ -434,7 +439,7 @@ ENTRY(stub_execveat)
DEFAULT_FRAME 0, 8
SAVE_EXTRA_REGS 8
call sys_execveat
- jmp return_from_stub
+ jmp return_from_exec
CFI_ENDPROC
END(stub_execveat)
@@ -471,7 +476,7 @@ ENTRY(stub_x32_execve)
DEFAULT_FRAME 0, 8
SAVE_EXTRA_REGS 8
call compat_sys_execve
- jmp return_from_stub
+ jmp return_from_exec
CFI_ENDPROC
END(stub_x32_execve)
@@ -480,7 +485,7 @@ ENTRY(stub_x32_execveat)
DEFAULT_FRAME 0, 8
SAVE_EXTRA_REGS 8
call compat_sys_execveat
- jmp return_from_stub
+ jmp return_from_exec
CFI_ENDPROC
END(stub_x32_execveat)
--
1.8.1.4
ret_from_fork can reuse epilogue at return_from_stub.
Run-tested.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e8f2aeb..c4304e2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -428,6 +428,7 @@ return_from_exec:
ret
return_from_stub:
addq $8, %rsp
+save_rax_and_restore_extra:
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
@@ -525,9 +526,8 @@ ENTRY(ret_from_fork)
1:
movq %rbp, %rdi
call *%rbx
- movl $0, RAX(%rsp)
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+ xorl %eax, %eax
+ jmp save_rax_and_restore_extra
CFI_ENDPROC
END(ret_from_fork)
--
1.8.1.4
This is not very useful:
jmp label
label:
Removing the jump.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c4304e2..36a360a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1435,7 +1435,6 @@ ENTRY(nmi)
/* If it is below the NMI stack, it is a normal NMI */
jb first_nmi
/* Ah, it is within the NMI stack, treat it as nested */
- jmp nested_nmi
CFI_REMEMBER_STATE
--
1.8.1.4
Replace
test
jz 1f
jmp label
1:
with
test
jnz label
Run-tested.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 36a360a..0124f6f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -512,18 +512,18 @@ ENTRY(ret_from_fork)
RESTORE_EXTRA_REGS
testl $3,CS(%rsp) # from kernel_thread?
- jz 1f
/*
* By the time we get here, we have no idea whether our pt_regs,
* ti flags, and ti status came from the 64-bit SYSCALL fast path,
* the slow path, or one of the ia32entry paths.
- * Use int_ret_from_sys_call to return, since it can safely handle
+ * Use IRET code path to return, since it can safely handle
* all of the above.
*/
- jmp int_ret_from_sys_call
+ jnz int_ret_from_sys_call
-1:
+ /* We came from from kernel_thread */
+ /* nb: we depend on RESTORE_EXTRA_REGS above */
movq %rbp, %rdi
call *%rbx
xorl %eax, %eax
--
1.8.1.4
It used to be used to check for _TIF_IA32, but the check has been removed.
Remove GET_THREAD_INFO too.
Run-tested.
Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
arch/x86/kernel/entry_64.S | 2 --
1 file changed, 2 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0124f6f..7bc097a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -507,8 +507,6 @@ ENTRY(ret_from_fork)
call schedule_tail # rdi: 'prev' task parameter
- GET_THREAD_INFO(%rcx)
-
RESTORE_EXTRA_REGS
testl $3,CS(%rsp) # from kernel_thread?
--
1.8.1.4
On Thu, Apr 2, 2015 at 10:36 AM, Denys Vlasenko <[email protected]> wrote:
> stub_sigreturn ignores old values of pt_regs->REG for all general-purpose
> registers, it sets them to values saved on userspace
> signal stack.
>
> Which is hardly surprising - it would be a bug if it would use pt_regs->REG.
> sigreturn must restore all registers.
>
> Therefore, SAVE_EXTRA_REGS in it ought to be redundant.
>
> It is a leftover from the time SAVE_EXTRA_REGS wasn't only saving registers,
> but it also was extending stack to "full" pt_regs.
>
> Delete this SAVE_EXTRA_REGS.
>
> Run-tested.
>
> Signed-off-by: Denys Vlasenko <[email protected]>
> CC: Linus Torvalds <[email protected]>
> CC: Steven Rostedt <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Borislav Petkov <[email protected]>
> CC: "H. Peter Anvin" <[email protected]>
> CC: Andy Lutomirski <[email protected]>
> CC: Oleg Nesterov <[email protected]>
> CC: Frederic Weisbecker <[email protected]>
> CC: Alexei Starovoitov <[email protected]>
> CC: Will Drewry <[email protected]>
> CC: Kees Cook <[email protected]>
> CC: [email protected]
> CC: [email protected]
> ---
> arch/x86/kernel/entry_64.S | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index ec51598..1cf245d 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -447,7 +447,12 @@ ENTRY(stub_rt_sigreturn)
> CFI_STARTPROC
> addq $8, %rsp
> DEFAULT_FRAME 0
> - SAVE_EXTRA_REGS
> + /*
> + * Despite RESTORE_EXTRA_REGS in return_from_stub,
> + * no need to SAVE_EXTRA_REGS here:
> + * sys_rt_sigreturn overwrites all general purpose pt_regs->REGs
> + * on stack, for RESTORE_{EXTRA,C}_REGS to pick them up.
> + */
> call sys_rt_sigreturn
> jmp return_from_stub
> CFI_ENDPROC
> @@ -458,7 +463,7 @@ ENTRY(stub_x32_rt_sigreturn)
> CFI_STARTPROC
> addq $8, %rsp
> DEFAULT_FRAME 0
> - SAVE_EXTRA_REGS
> + /* No need to SAVE_EXTRA_REGS */
> call sys32_x32_rt_sigreturn
> jmp return_from_stub
> CFI_ENDPROC
I had the same idea, but determined sigreturn can fault and return an
error code without modifying all the registers. This would leak junk
from the stack.
Also, the VDSO doesn't handle this (unlikely under normal
circumstances) case. It assumes the syscall will not return there and
will go off into the weeds.
--
Brian Gerst
On 04/02/2015 05:01 PM, Brian Gerst wrote:
> On Thu, Apr 2, 2015 at 10:36 AM, Denys Vlasenko <[email protected]> wrote:
>> stub_sigreturn ignores old values of pt_regs->REG for all general-purpose
>> registers, it sets them to values saved on userspace
>> signal stack.
>>
>> Which is hardly surprising - it would be a bug if it would use pt_regs->REG.
>> sigreturn must restore all registers.
>>
>> Therefore, SAVE_EXTRA_REGS in it ought to be redundant.
>>
>> It is a leftover from the time SAVE_EXTRA_REGS wasn't only saving registers,
>> but it also was extending stack to "full" pt_regs.
>>
>> Delete this SAVE_EXTRA_REGS.
>>
>> Run-tested.
>>
>> Signed-off-by: Denys Vlasenko <[email protected]>
>> CC: Linus Torvalds <[email protected]>
>> CC: Steven Rostedt <[email protected]>
>> CC: Ingo Molnar <[email protected]>
>> CC: Borislav Petkov <[email protected]>
>> CC: "H. Peter Anvin" <[email protected]>
>> CC: Andy Lutomirski <[email protected]>
>> CC: Oleg Nesterov <[email protected]>
>> CC: Frederic Weisbecker <[email protected]>
>> CC: Alexei Starovoitov <[email protected]>
>> CC: Will Drewry <[email protected]>
>> CC: Kees Cook <[email protected]>
>> CC: [email protected]
>> CC: [email protected]
>> ---
>> arch/x86/kernel/entry_64.S | 9 +++++++--
>> 1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>> index ec51598..1cf245d 100644
>> --- a/arch/x86/kernel/entry_64.S
>> +++ b/arch/x86/kernel/entry_64.S
>> @@ -447,7 +447,12 @@ ENTRY(stub_rt_sigreturn)
>> CFI_STARTPROC
>> addq $8, %rsp
>> DEFAULT_FRAME 0
>> - SAVE_EXTRA_REGS
>> + /*
>> + * Despite RESTORE_EXTRA_REGS in return_from_stub,
>> + * no need to SAVE_EXTRA_REGS here:
>> + * sys_rt_sigreturn overwrites all general purpose pt_regs->REGs
>> + * on stack, for RESTORE_{EXTRA,C}_REGS to pick them up.
>> + */
>> call sys_rt_sigreturn
>> jmp return_from_stub
>> CFI_ENDPROC
>> @@ -458,7 +463,7 @@ ENTRY(stub_x32_rt_sigreturn)
>> CFI_STARTPROC
>> addq $8, %rsp
>> DEFAULT_FRAME 0
>> - SAVE_EXTRA_REGS
>> + /* No need to SAVE_EXTRA_REGS */
>> call sys32_x32_rt_sigreturn
>> jmp return_from_stub
>> CFI_ENDPROC
>
> I had the same idea, but determined sigreturn can fault and return an
> error code without modifying all the registers. This would leak junk
> from the stack.
This still can be made to work by not RESTORE'ing EXTRA_REGS either,
if there is a way to detect the failure:
call sys_rt_sigreturn
- jmp return_from_stub
+ testl ???????????
+ jz return_from_stub
+ ret
CFI_ENDPROC
But this is not a normal syscall, off-hand I don't see an easy way
to do the test. sys_rt_sigreturn() on failure runs this code:
...
segfault:
force_sig(SIGSEGV, current);
return 0;
}
Help?
On Thu, Apr 2, 2015 at 7:36 AM, Denys Vlasenko <[email protected]> wrote:
> It used to be used to check for _TIF_IA32, but the check has been removed.
>
> Remove GET_THREAD_INFO too.
>
> Run-tested.
Acked-by: Andy Lutomirski <[email protected]>
Anyone know what the eflags reset code just above it is for? Is it
because we used to allow NT in kernel eflags?
--Andy
>
> Signed-off-by: Denys Vlasenko <[email protected]>
> CC: Linus Torvalds <[email protected]>
> CC: Steven Rostedt <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Borislav Petkov <[email protected]>
> CC: "H. Peter Anvin" <[email protected]>
> CC: Andy Lutomirski <[email protected]>
> CC: Oleg Nesterov <[email protected]>
> CC: Frederic Weisbecker <[email protected]>
> CC: Alexei Starovoitov <[email protected]>
> CC: Will Drewry <[email protected]>
> CC: Kees Cook <[email protected]>
> CC: [email protected]
> CC: [email protected]
> ---
> arch/x86/kernel/entry_64.S | 2 --
> 1 file changed, 2 deletions(-)
>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 0124f6f..7bc097a 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -507,8 +507,6 @@ ENTRY(ret_from_fork)
>
> call schedule_tail # rdi: 'prev' task parameter
>
> - GET_THREAD_INFO(%rcx)
> -
> RESTORE_EXTRA_REGS
>
> testl $3,CS(%rsp) # from kernel_thread?
> --
> 1.8.1.4
>
--
Andy Lutomirski
AMA Capital Management, LLC
On Thu, Apr 2, 2015 at 11:20 AM, Denys Vlasenko <[email protected]> wrote:
> On 04/02/2015 05:01 PM, Brian Gerst wrote:
>> On Thu, Apr 2, 2015 at 10:36 AM, Denys Vlasenko <[email protected]> wrote:
>>> stub_sigreturn ignores old values of pt_regs->REG for all general-purpose
>>> registers, it sets them to values saved on userspace
>>> signal stack.
>>>
>>> Which is hardly surprising - it would be a bug if it would use pt_regs->REG.
>>> sigreturn must restore all registers.
>>>
>>> Therefore, SAVE_EXTRA_REGS in it ought to be redundant.
>>>
>>> It is a leftover from the time SAVE_EXTRA_REGS wasn't only saving registers,
>>> but it also was extending stack to "full" pt_regs.
>>>
>>> Delete this SAVE_EXTRA_REGS.
>>>
>>> Run-tested.
>>>
>>> Signed-off-by: Denys Vlasenko <[email protected]>
>>> CC: Linus Torvalds <[email protected]>
>>> CC: Steven Rostedt <[email protected]>
>>> CC: Ingo Molnar <[email protected]>
>>> CC: Borislav Petkov <[email protected]>
>>> CC: "H. Peter Anvin" <[email protected]>
>>> CC: Andy Lutomirski <[email protected]>
>>> CC: Oleg Nesterov <[email protected]>
>>> CC: Frederic Weisbecker <[email protected]>
>>> CC: Alexei Starovoitov <[email protected]>
>>> CC: Will Drewry <[email protected]>
>>> CC: Kees Cook <[email protected]>
>>> CC: [email protected]
>>> CC: [email protected]
>>> ---
>>> arch/x86/kernel/entry_64.S | 9 +++++++--
>>> 1 file changed, 7 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>>> index ec51598..1cf245d 100644
>>> --- a/arch/x86/kernel/entry_64.S
>>> +++ b/arch/x86/kernel/entry_64.S
>>> @@ -447,7 +447,12 @@ ENTRY(stub_rt_sigreturn)
>>> CFI_STARTPROC
>>> addq $8, %rsp
>>> DEFAULT_FRAME 0
>>> - SAVE_EXTRA_REGS
>>> + /*
>>> + * Despite RESTORE_EXTRA_REGS in return_from_stub,
>>> + * no need to SAVE_EXTRA_REGS here:
>>> + * sys_rt_sigreturn overwrites all general purpose pt_regs->REGs
>>> + * on stack, for RESTORE_{EXTRA,C}_REGS to pick them up.
>>> + */
>>> call sys_rt_sigreturn
>>> jmp return_from_stub
>>> CFI_ENDPROC
>>> @@ -458,7 +463,7 @@ ENTRY(stub_x32_rt_sigreturn)
>>> CFI_STARTPROC
>>> addq $8, %rsp
>>> DEFAULT_FRAME 0
>>> - SAVE_EXTRA_REGS
>>> + /* No need to SAVE_EXTRA_REGS */
>>> call sys32_x32_rt_sigreturn
>>> jmp return_from_stub
>>> CFI_ENDPROC
>>
>> I had the same idea, but determined sigreturn can fault and return an
>> error code without modifying all the registers. This would leak junk
>> from the stack.
To clarify, I remembered looking at sigreturn possibly faulting from
the 32-bit perspective, where the 6th arg is read from the user stack
and a fault there would return -EFAULT, for any syscall.
> This still can be made to work by not RESTORE'ing EXTRA_REGS either,
> if there is a way to detect the failure:
>
> call sys_rt_sigreturn
> - jmp return_from_stub
> + testl ???????????
> + jz return_from_stub
> + ret
> CFI_ENDPROC
>
> But this is not a normal syscall, off-hand I don't see an easy way
> to do the test. sys_rt_sigreturn() on failure runs this code:
>
> ...
> segfault:
> force_sig(SIGSEGV, current);
> return 0;
> }
>
> Help?
I don't think you can test the return value, because in the success
case it can be any value (the restored RAX value).
--
Brian Gerst
On 04/02/2015 09:10 PM, Brian Gerst wrote:
> On Thu, Apr 2, 2015 at 11:20 AM, Denys Vlasenko <[email protected]> wrote:
>> On 04/02/2015 05:01 PM, Brian Gerst wrote:
>>> On Thu, Apr 2, 2015 at 10:36 AM, Denys Vlasenko <[email protected]> wrote:
>>>> stub_sigreturn ignores old values of pt_regs->REG for all general-purpose
>>>> registers, it sets them to values saved on userspace
>>>> signal stack.
>>>>
>>>> Which is hardly surprising - it would be a bug if it would use pt_regs->REG.
>>>> sigreturn must restore all registers.
>>>>
>>>> Therefore, SAVE_EXTRA_REGS in it ought to be redundant.
>>>>
>>>> It is a leftover from the time SAVE_EXTRA_REGS wasn't only saving registers,
>>>> but it also was extending stack to "full" pt_regs.
>>>>
>>>> Delete this SAVE_EXTRA_REGS.
>>>>
>>>> Run-tested.
>>>>
>>>> Signed-off-by: Denys Vlasenko <[email protected]>
>>>> CC: Linus Torvalds <[email protected]>
>>>> CC: Steven Rostedt <[email protected]>
>>>> CC: Ingo Molnar <[email protected]>
>>>> CC: Borislav Petkov <[email protected]>
>>>> CC: "H. Peter Anvin" <[email protected]>
>>>> CC: Andy Lutomirski <[email protected]>
>>>> CC: Oleg Nesterov <[email protected]>
>>>> CC: Frederic Weisbecker <[email protected]>
>>>> CC: Alexei Starovoitov <[email protected]>
>>>> CC: Will Drewry <[email protected]>
>>>> CC: Kees Cook <[email protected]>
>>>> CC: [email protected]
>>>> CC: [email protected]
>>>> ---
>>>> arch/x86/kernel/entry_64.S | 9 +++++++--
>>>> 1 file changed, 7 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>>>> index ec51598..1cf245d 100644
>>>> --- a/arch/x86/kernel/entry_64.S
>>>> +++ b/arch/x86/kernel/entry_64.S
>>>> @@ -447,7 +447,12 @@ ENTRY(stub_rt_sigreturn)
>>>> CFI_STARTPROC
>>>> addq $8, %rsp
>>>> DEFAULT_FRAME 0
>>>> - SAVE_EXTRA_REGS
>>>> + /*
>>>> + * Despite RESTORE_EXTRA_REGS in return_from_stub,
>>>> + * no need to SAVE_EXTRA_REGS here:
>>>> + * sys_rt_sigreturn overwrites all general purpose pt_regs->REGs
>>>> + * on stack, for RESTORE_{EXTRA,C}_REGS to pick them up.
>>>> + */
>>>> call sys_rt_sigreturn
>>>> jmp return_from_stub
>>>> CFI_ENDPROC
>>>> @@ -458,7 +463,7 @@ ENTRY(stub_x32_rt_sigreturn)
>>>> CFI_STARTPROC
>>>> addq $8, %rsp
>>>> DEFAULT_FRAME 0
>>>> - SAVE_EXTRA_REGS
>>>> + /* No need to SAVE_EXTRA_REGS */
>>>> call sys32_x32_rt_sigreturn
>>>> jmp return_from_stub
>>>> CFI_ENDPROC
>>>
>>> I had the same idea, but determined sigreturn can fault and return an
>>> error code without modifying all the registers. This would leak junk
>>> from the stack.
>
> To clarify, I remembered looking at sigreturn possibly faulting from
> the 32-bit perspective, where the 6th arg is read from the user stack
> and a fault there would return -EFAULT, for any syscall.
>
>> This still can be made to work by not RESTORE'ing EXTRA_REGS either,
>> if there is a way to detect the failure:
>>
>> call sys_rt_sigreturn
>> - jmp return_from_stub
>> + testl ???????????
>> + jz return_from_stub
>> + ret
>> CFI_ENDPROC
>>
>> But this is not a normal syscall, off-hand I don't see an easy way
>> to do the test. sys_rt_sigreturn() on failure runs this code:
>>
>> ...
>> segfault:
>> force_sig(SIGSEGV, current);
>> return 0;
>> }
>>
>> Help?
>
> I don't think you can test the return value, because in the success
> case it can be any value (the restored RAX value).
Yeah. I think the "optimize out SAVE_EXTRA_REGS on sigreturn" idea
didn't play out.
On Thu, Apr 2, 2015 at 12:10 PM, Brian Gerst <[email protected]> wrote:
> On Thu, Apr 2, 2015 at 11:20 AM, Denys Vlasenko <[email protected]> wrote:
>> On 04/02/2015 05:01 PM, Brian Gerst wrote:
>>> On Thu, Apr 2, 2015 at 10:36 AM, Denys Vlasenko <[email protected]> wrote:
>>>> stub_sigreturn ignores old values of pt_regs->REG for all general-purpose
>>>> registers, it sets them to values saved on userspace
>>>> signal stack.
>>>>
>>>> Which is hardly surprising - it would be a bug if it would use pt_regs->REG.
>>>> sigreturn must restore all registers.
>>>>
>>>> Therefore, SAVE_EXTRA_REGS in it ought to be redundant.
>>>>
>>>> It is a leftover from the time SAVE_EXTRA_REGS wasn't only saving registers,
>>>> but it also was extending stack to "full" pt_regs.
>>>>
>>>> Delete this SAVE_EXTRA_REGS.
>>>>
>>>> Run-tested.
>>>>
>>>> Signed-off-by: Denys Vlasenko <[email protected]>
>>>> CC: Linus Torvalds <[email protected]>
>>>> CC: Steven Rostedt <[email protected]>
>>>> CC: Ingo Molnar <[email protected]>
>>>> CC: Borislav Petkov <[email protected]>
>>>> CC: "H. Peter Anvin" <[email protected]>
>>>> CC: Andy Lutomirski <[email protected]>
>>>> CC: Oleg Nesterov <[email protected]>
>>>> CC: Frederic Weisbecker <[email protected]>
>>>> CC: Alexei Starovoitov <[email protected]>
>>>> CC: Will Drewry <[email protected]>
>>>> CC: Kees Cook <[email protected]>
>>>> CC: [email protected]
>>>> CC: [email protected]
>>>> ---
>>>> arch/x86/kernel/entry_64.S | 9 +++++++--
>>>> 1 file changed, 7 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>>>> index ec51598..1cf245d 100644
>>>> --- a/arch/x86/kernel/entry_64.S
>>>> +++ b/arch/x86/kernel/entry_64.S
>>>> @@ -447,7 +447,12 @@ ENTRY(stub_rt_sigreturn)
>>>> CFI_STARTPROC
>>>> addq $8, %rsp
>>>> DEFAULT_FRAME 0
>>>> - SAVE_EXTRA_REGS
>>>> + /*
>>>> + * Despite RESTORE_EXTRA_REGS in return_from_stub,
>>>> + * no need to SAVE_EXTRA_REGS here:
>>>> + * sys_rt_sigreturn overwrites all general purpose pt_regs->REGs
>>>> + * on stack, for RESTORE_{EXTRA,C}_REGS to pick them up.
>>>> + */
>>>> call sys_rt_sigreturn
>>>> jmp return_from_stub
>>>> CFI_ENDPROC
>>>> @@ -458,7 +463,7 @@ ENTRY(stub_x32_rt_sigreturn)
>>>> CFI_STARTPROC
>>>> addq $8, %rsp
>>>> DEFAULT_FRAME 0
>>>> - SAVE_EXTRA_REGS
>>>> + /* No need to SAVE_EXTRA_REGS */
>>>> call sys32_x32_rt_sigreturn
>>>> jmp return_from_stub
>>>> CFI_ENDPROC
>>>
>>> I had the same idea, but determined sigreturn can fault and return an
>>> error code without modifying all the registers. This would leak junk
>>> from the stack.
>
> To clarify, I remembered looking at sigreturn possibly faulting from
> the 32-bit perspective, where the 6th arg is read from the user stack
> and a fault there would return -EFAULT, for any syscall.
>
>> This still can be made to work by not RESTORE'ing EXTRA_REGS either,
>> if there is a way to detect the failure:
>>
>> call sys_rt_sigreturn
>> - jmp return_from_stub
>> + testl ???????????
>> + jz return_from_stub
>> + ret
>> CFI_ENDPROC
>>
>> But this is not a normal syscall, off-hand I don't see an easy way
>> to do the test. sys_rt_sigreturn() on failure runs this code:
>>
>> ...
>> segfault:
>> force_sig(SIGSEGV, current);
>> return 0;
>> }
>>
>> Help?
>
> I don't think you can test the return value, because in the success
> case it can be any value (the restored RAX value).
Given that getting this right is complicated and sigreturn is already
really slow, I'm not convinced that this particular optimization is
worthwhile.
--Andy