2022-05-08 19:32:06

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

Since the upper regs don't exist for ia32 code, preserving them
doesn't hurt and it simplifies the code.

This doesn't add any attack surface that would not already be
available through INT80.

Notably:

- 32bit SYSENTER: didn't clear si, dx, cx.

- 32bit SYSCALL, INT80: *do* clear si since the C functions don't
take a second argument.

- 64bit: didn't clear si since the C functions take a second
argument; except the error_entry path might have only one argument,
so clearing si was missing here.

32b SYSENTER should be clearing all those 3 registers, nothing uses them
and selftests pass.

Unconditionally clear rsi since it simplifies code.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Borislav Petkov <[email protected]>
---
arch/x86/entry/calling.h | 1
arch/x86/entry/entry_64_compat.S | 87 +--------------------------------------
2 files changed, 4 insertions(+), 84 deletions(-)

--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -99,6 +99,7 @@ For 32-bit we have the following convent
* well before they could be put to use in a speculative execution
* gadget.
*/
+ xorl %esi, %esi /* nospec si */
xorl %edx, %edx /* nospec dx */
xorl %ecx, %ecx /* nospec cx */
xorl %r8d, %r8d /* nospec r8 */
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -83,32 +83,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_af
movl %eax, %eax

pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS

cld
@@ -225,35 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_saf
SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rbp /* pt_regs->cx (stashed in bp) */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS

movq %rsp, %rdi
@@ -380,35 +327,7 @@ SYM_CODE_START(entry_INT80_compat)
pushq 0*8(%rax) /* regs->orig_ax */
.Lint80_keep_stack:

- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq %r10 /* pt_regs->r10*/
- xorl %r10d, %r10d /* nospec r10 */
- pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp */
- pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS

cld




Subject: [tip: x86/asm] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: 8c42819b61b8340cff0643e65b5ce6a4144ab155
Gitweb: https://git.kernel.org/tip/8c42819b61b8340cff0643e65b5ce6a4144ab155
Author: Peter Zijlstra <[email protected]>
AuthorDate: Fri, 06 May 2022 14:14:34 +02:00
Committer: Borislav Petkov <[email protected]>
CommitterDate: Fri, 06 May 2022 15:57:02 +02:00

x86/entry: Use PUSH_AND_CLEAR_REGS for compat

Since the upper regs don't exist for ia32 code, preserving them
doesn't hurt and it simplifies the code.

This doesn't add any attack surface that would not already be
available through INT80.

Notably:

- 32bit SYSENTER: didn't clear si, dx, cx.

- 32bit SYSCALL, INT80: *do* clear si since the C functions don't
take a second argument.

- 64bit: didn't clear si since the C functions take a second
argument; except the error_entry path might have only one argument,
so clearing si was missing here.

32b SYSENTER should be clearing all those 3 registers, nothing uses them
and selftests pass.

Unconditionally clear rsi since it simplifies code.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Reviewed-by: Borislav Petkov <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/entry/calling.h | 1 +-
arch/x86/entry/entry_64_compat.S | 87 +------------------------------
2 files changed, 4 insertions(+), 84 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index a4c061f..debbe94 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -99,6 +99,7 @@ For 32-bit we have the following conventions - kernel is built with
* well before they could be put to use in a speculative execution
* gadget.
*/
+ xorl %esi, %esi /* nospec si */
xorl %edx, %edx /* nospec dx */
xorl %ecx, %ecx /* nospec cx */
xorl %r8d, %r8d /* nospec r8 */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d743eaa..ed2be36 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -83,32 +83,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax

pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS

cld
@@ -225,35 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rbp /* pt_regs->cx (stashed in bp) */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS

movq %rsp, %rdi
@@ -380,35 +327,7 @@ SYM_CODE_START(entry_INT80_compat)
pushq 0*8(%rax) /* regs->orig_ax */
.Lint80_keep_stack:

- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq %r10 /* pt_regs->r10*/
- xorl %r10d, %r10d /* nospec r10 */
- pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp */
- pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS

cld

2022-05-19 18:47:08

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On Thu, May 19, 2022 at 09:24:11AM -0700, Guenter Roeck wrote:
> On Fri, May 06, 2022 at 02:14:34PM +0200, Peter Zijlstra wrote:
> > Since the upper regs don't exist for ia32 code, preserving them
> > doesn't hurt and it simplifies the code.
> >
> > This doesn't add any attack surface that would not already be
> > available through INT80.
> >
> > Notably:
> >
> > - 32bit SYSENTER: didn't clear si, dx, cx.
> >
> > - 32bit SYSCALL, INT80: *do* clear si since the C functions don't
> > take a second argument.
> >
> > - 64bit: didn't clear si since the C functions take a second
> > argument; except the error_entry path might have only one argument,
> > so clearing si was missing here.
> >
> > 32b SYSENTER should be clearing all those 3 registers, nothing uses them
> > and selftests pass.
> >
> > Unconditionally clear rsi since it simplifies code.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > Reviewed-by: Borislav Petkov <[email protected]>
>
> linux-next (next-20220519) crashes due to this patch when booting
> q35:EPYC-Rome in qemu.

Could you try backing out each of the hunks one at a time? They're all
more or less independent.

My bet with this being a #PF on an AMD machine, it's either the SI clear
or the SYSCALL change.

2022-05-20 02:49:33

by Lai Jiangshan

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On Fri, May 20, 2022 at 1:35 AM Josh Poimboeuf <[email protected]> wrote:
>
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index ed2be3615b50..f76e674d22c4 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -200,7 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
> SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
> movl %eax, %eax /* discard orig_ax high bits */
> pushq %rax /* pt_regs->orig_ax */
> - PUSH_AND_CLEAR_REGS rax=$-ENOSYS
> + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS

Some comments need to be here to explain why %rcx is stashed in %rbp.

The code doing the stash in userspace may be in
arch/x86/entry/vdso/vdso32/system_call.S (see SYSCALL_SEQUENCE)

Thanks
Lai

2022-05-20 08:33:27

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On Thu, May 19, 2022 at 10:11:31AM -0700, Josh Poimboeuf wrote:
> On Thu, May 19, 2022 at 07:00:09PM +0200, Peter Zijlstra wrote:
> > On Thu, May 19, 2022 at 09:24:11AM -0700, Guenter Roeck wrote:
> > > On Fri, May 06, 2022 at 02:14:34PM +0200, Peter Zijlstra wrote:
> > > > Since the upper regs don't exist for ia32 code, preserving them
> > > > doesn't hurt and it simplifies the code.
> > > >
> > > > This doesn't add any attack surface that would not already be
> > > > available through INT80.
> > > >
> > > > Notably:
> > > >
> > > > - 32bit SYSENTER: didn't clear si, dx, cx.
> > > >
> > > > - 32bit SYSCALL, INT80: *do* clear si since the C functions don't
> > > > take a second argument.
> > > >
> > > > - 64bit: didn't clear si since the C functions take a second
> > > > argument; except the error_entry path might have only one argument,
> > > > so clearing si was missing here.
> > > >
> > > > 32b SYSENTER should be clearing all those 3 registers, nothing uses them
> > > > and selftests pass.
> > > >
> > > > Unconditionally clear rsi since it simplifies code.
> > > >
> > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > Reviewed-by: Borislav Petkov <[email protected]>
> > >
> > > linux-next (next-20220519) crashes due to this patch when booting
> > > q35:EPYC-Rome in qemu.
> >
> > Could you try backing out each of the hunks one at a time? They're all
> > more or less independent.
> >
> > My bet with this being a #PF on an AMD machine, it's either the SI clear
> > or the SYSCALL change.
>
> I think this should fix it:

Actually that one had a horrendous bug :-)

Guenter, can you try this one?

From: Josh Poimboeuf <[email protected]>
Subject: [PATCH] x86/entry: Fix register corruption in compat syscall

A panic was reported in the init process on AMD:

Run /sbin/init as init process
init[1]: segfault at f7fd5ca0 ip 00000000f7f5bbc7 sp 00000000ffa06aa0 error 7 in libc.so[f7f51000+4e000]
Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
CPU: 1 PID: 1 Comm: init Tainted: G W 5.18.0-rc7-next-20220519 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x57/0x7d
panic+0x10f/0x28d
do_exit.cold+0x18/0x48
do_group_exit+0x2e/0xb0
get_signal+0xb6d/0xb80
arch_do_signal_or_restart+0x31/0x760
? show_opcodes.cold+0x1c/0x21
? force_sig_fault+0x49/0x70
exit_to_user_mode_prepare+0x131/0x1a0
irqentry_exit_to_user_mode+0x5/0x30
asm_exc_page_fault+0x27/0x30
RIP: 0023:0xf7f5bbc7
Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00
RSP: 002b:00000000ffa06aa0 EFLAGS: 00000217
RAX: 00000000f7fd5ca0 RBX: 000000000000000c RCX: 0000000000001000
RDX: 0000000000000001 RSI: 00000000f7fd5b60 RDI: 00000000f7fd5b60
RBP: 00000000f7fd1c1c R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
</TASK>

The task's CX register got corrupted by commit 8c42819b61b8 ("x86/entry:
Use PUSH_AND_CLEAR_REGS for compat"), which overlooked the fact that
compat SYSCALL had the user's CX value stored in BP.

Before that commit, CX was saved from its stashed value in BP:

pushq %rbp /* pt_regs->cx (stashed in bp) */

But then it got changed to:

pushq %rcx /* pt_regs->cx */

resulting in the wrong value getting saved and later restored back to
the user. Fix it by pushing the correct value again (BP) for regs->cx.

Fixes: 8c42819b61b8 ("x86/entry: Use PUSH_AND_CLEAR_REGS for compat")
Reported-by: Guenter Roeck <[email protected]>
Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/x86/entry/calling.h | 8 ++++----
arch/x86/entry/entry_64_compat.S | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index a97cc78ecb92..29b36e9e4e74 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/

-.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -73,7 +73,7 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
+ pushq \rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
pushq %r9 /* pt_regs->r9 */
@@ -115,8 +115,8 @@ For 32-bit we have the following conventions - kernel is built with

.endm

-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
+ PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
CLEAR_REGS
.endm

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index ed2be3615b50..f76e674d22c4 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -200,7 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
- PUSH_AND_CLEAR_REGS rax=$-ENOSYS
+ PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
UNWIND_HINT_REGS

movq %rsp, %rdi
--
2.34.3


2022-05-20 15:41:38

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On Thu, May 19, 2022 at 10:35:38AM -0700, Josh Poimboeuf wrote:
> On Thu, May 19, 2022 at 10:11:31AM -0700, Josh Poimboeuf wrote:
> > On Thu, May 19, 2022 at 07:00:09PM +0200, Peter Zijlstra wrote:
> > > On Thu, May 19, 2022 at 09:24:11AM -0700, Guenter Roeck wrote:
> > > > On Fri, May 06, 2022 at 02:14:34PM +0200, Peter Zijlstra wrote:
> > > > > Since the upper regs don't exist for ia32 code, preserving them
> > > > > doesn't hurt and it simplifies the code.
> > > > >
> > > > > This doesn't add any attack surface that would not already be
> > > > > available through INT80.
> > > > >
> > > > > Notably:
> > > > >
> > > > > - 32bit SYSENTER: didn't clear si, dx, cx.
> > > > >
> > > > > - 32bit SYSCALL, INT80: *do* clear si since the C functions don't
> > > > > take a second argument.
> > > > >
> > > > > - 64bit: didn't clear si since the C functions take a second
> > > > > argument; except the error_entry path might have only one argument,
> > > > > so clearing si was missing here.
> > > > >
> > > > > 32b SYSENTER should be clearing all those 3 registers, nothing uses them
> > > > > and selftests pass.
> > > > >
> > > > > Unconditionally clear rsi since it simplifies code.
> > > > >
> > > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > > Reviewed-by: Borislav Petkov <[email protected]>
> > > >
> > > > linux-next (next-20220519) crashes due to this patch when booting
> > > > q35:EPYC-Rome in qemu.
> > >
> > > Could you try backing out each of the hunks one at a time? They're all
> > > more or less independent.
> > >
> > > My bet with this being a #PF on an AMD machine, it's either the SI clear
> > > or the SYSCALL change.
> >
> > I think this should fix it:
>
> Actually that one had a horrendous bug :-)
>
> Guenter, can you try this one?
>

This fixes the problem for me.

Tested-by: Guenter Roeck <[email protected]>

Guenter

> From: Josh Poimboeuf <[email protected]>
> Subject: [PATCH] x86/entry: Fix register corruption in compat syscall
>
> A panic was reported in the init process on AMD:
>
> Run /sbin/init as init process
> init[1]: segfault at f7fd5ca0 ip 00000000f7f5bbc7 sp 00000000ffa06aa0 error 7 in libc.so[f7f51000+4e000]
> Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> CPU: 1 PID: 1 Comm: init Tainted: G W 5.18.0-rc7-next-20220519 #1
> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
> Call Trace:
> <TASK>
> dump_stack_lvl+0x57/0x7d
> panic+0x10f/0x28d
> do_exit.cold+0x18/0x48
> do_group_exit+0x2e/0xb0
> get_signal+0xb6d/0xb80
> arch_do_signal_or_restart+0x31/0x760
> ? show_opcodes.cold+0x1c/0x21
> ? force_sig_fault+0x49/0x70
> exit_to_user_mode_prepare+0x131/0x1a0
> irqentry_exit_to_user_mode+0x5/0x30
> asm_exc_page_fault+0x27/0x30
> RIP: 0023:0xf7f5bbc7
> Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00
> RSP: 002b:00000000ffa06aa0 EFLAGS: 00000217
> RAX: 00000000f7fd5ca0 RBX: 000000000000000c RCX: 0000000000001000
> RDX: 0000000000000001 RSI: 00000000f7fd5b60 RDI: 00000000f7fd5b60
> RBP: 00000000f7fd1c1c R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000
> R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> </TASK>
>
> The task's CX register got corrupted by commit 8c42819b61b8 ("x86/entry:
> Use PUSH_AND_CLEAR_REGS for compat"), which overlooked the fact that
> compat SYSCALL had the user's CX value stored in BP.
>
> Before that commit, CX was saved from its stashed value in BP:
>
> pushq %rbp /* pt_regs->cx (stashed in bp) */
>
> But then it got changed to:
>
> pushq %rcx /* pt_regs->cx */
>
> resulting in the wrong value getting saved and later restored back to
> the user. Fix it by pushing the correct value again (BP) for regs->cx.
>
> Fixes: 8c42819b61b8 ("x86/entry: Use PUSH_AND_CLEAR_REGS for compat")
> Reported-by: Guenter Roeck <[email protected]>
> Signed-off-by: Josh Poimboeuf <[email protected]>
> ---
> arch/x86/entry/calling.h | 8 ++++----
> arch/x86/entry/entry_64_compat.S | 2 +-
> 2 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
> index a97cc78ecb92..29b36e9e4e74 100644
> --- a/arch/x86/entry/calling.h
> +++ b/arch/x86/entry/calling.h
> @@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with
> * for assembly code:
> */
>
> -.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
> +.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
> .if \save_ret
> pushq %rsi /* pt_regs->si */
> movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
> @@ -73,7 +73,7 @@ For 32-bit we have the following conventions - kernel is built with
> pushq %rsi /* pt_regs->si */
> .endif
> pushq \rdx /* pt_regs->dx */
> - pushq %rcx /* pt_regs->cx */
> + pushq \rcx /* pt_regs->cx */
> pushq \rax /* pt_regs->ax */
> pushq %r8 /* pt_regs->r8 */
> pushq %r9 /* pt_regs->r9 */
> @@ -115,8 +115,8 @@ For 32-bit we have the following conventions - kernel is built with
>
> .endm
>
> -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
> - PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
> +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
> + PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
> CLEAR_REGS
> .endm
>
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index ed2be3615b50..f76e674d22c4 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -200,7 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
> SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
> movl %eax, %eax /* discard orig_ax high bits */
> pushq %rax /* pt_regs->orig_ax */
> - PUSH_AND_CLEAR_REGS rax=$-ENOSYS
> + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
> UNWIND_HINT_REGS
>
> movq %rsp, %rdi
> --
> 2.34.3
>

2022-05-21 20:35:43

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On Fri, May 06, 2022 at 02:14:34PM +0200, Peter Zijlstra wrote:
> Since the upper regs don't exist for ia32 code, preserving them
> doesn't hurt and it simplifies the code.
>
> This doesn't add any attack surface that would not already be
> available through INT80.
>
> Notably:
>
> - 32bit SYSENTER: didn't clear si, dx, cx.
>
> - 32bit SYSCALL, INT80: *do* clear si since the C functions don't
> take a second argument.
>
> - 64bit: didn't clear si since the C functions take a second
> argument; except the error_entry path might have only one argument,
> so clearing si was missing here.
>
> 32b SYSENTER should be clearing all those 3 registers, nothing uses them
> and selftests pass.
>
> Unconditionally clear rsi since it simplifies code.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Reviewed-by: Borislav Petkov <[email protected]>

linux-next (next-20220519) crashes due to this patch when booting
q35:EPYC-Rome in qemu.

[ 20.716975] Run /sbin/init as init process
[ 20.790596] init[1]: segfault at f7fd5ca0 ip 00000000f7f5bbc7 sp 00000000ffa06aa0 error 7 in libc.so[f7f51000+4e000]
[ 20.793487] Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00
[ 20.796332] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[ 20.796621] CPU: 1 PID: 1 Comm: init Tainted: G W 5.18.0-rc7-next-20220519 #1
[ 20.796724] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
[ 20.796724] Call Trace:
[ 20.796724] <TASK>
[ 20.796724] dump_stack_lvl+0x57/0x7d
[ 20.796724] panic+0x10f/0x28d
[ 20.796724] do_exit.cold+0x18/0x48
[ 20.796724] do_group_exit+0x2e/0xb0
[ 20.796724] get_signal+0xb6d/0xb80
[ 20.796724] arch_do_signal_or_restart+0x31/0x760
[ 20.796724] ? show_opcodes.cold+0x1c/0x21
[ 20.796724] ? force_sig_fault+0x49/0x70
[ 20.796724] exit_to_user_mode_prepare+0x131/0x1a0
[ 20.796724] irqentry_exit_to_user_mode+0x5/0x30
[ 20.796724] asm_exc_page_fault+0x27/0x30
[ 20.796724] RIP: 0023:0xf7f5bbc7
[ 20.796724] Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00
[ 20.796724] RSP: 002b:00000000ffa06aa0 EFLAGS: 00000217
[ 20.796724] RAX: 00000000f7fd5ca0 RBX: 000000000000000c RCX: 0000000000001000
[ 20.796724] RDX: 0000000000000001 RSI: 00000000f7fd5b60 RDI: 00000000f7fd5b60
[ 20.796724] RBP: 00000000f7fd1c1c R08: 0000000000000000 R09: 0000000000000000
[ 20.796724] R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000
[ 20.796724] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 20.796724] </TASK>
[ 20.796724] Kernel Offset: 0x33000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)

Bisect log attached. Reverting the patch fixes the problem.

Guenter

---
# bad: [21498d01d045c5b95b93e0a0625ae965b4330ebe] Add linux-next specific files for 20220519
# good: [42226c989789d8da4af1de0c31070c96726d990c] Linux 5.18-rc7
git bisect start 'HEAD' 'v5.18-rc7'
# good: [00ad3ec718d0a85b8fe6b317f07e585650e05073] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
git bisect good 00ad3ec718d0a85b8fe6b317f07e585650e05073
# bad: [7bbdec75300e073a8fa14d19409af4b43bbaff17] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
git bisect bad 7bbdec75300e073a8fa14d19409af4b43bbaff17
# good: [c298441f72cd14bbe74ac49a5c60ecf302cc2f97] Merge branch 'drm-next' of https://gitlab.freedesktop.org/agd5f/linux
git bisect good c298441f72cd14bbe74ac49a5c60ecf302cc2f97
# good: [e261ae308e94dc89db3f473db29662942a4dd532] Merge branch 'for-next' of git://git.kernel.dk/linux-block.git
git bisect good e261ae308e94dc89db3f473db29662942a4dd532
# good: [ba821c4223c38f4ec1cc2c7151c8abd4c70e3178] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
git bisect good ba821c4223c38f4ec1cc2c7151c8abd4c70e3178
# good: [2b0b069fc23047b66e1bf6ffd60e7ea5d4e2f484] Merge branch into tip/master: 'smp/core'
git bisect good 2b0b069fc23047b66e1bf6ffd60e7ea5d4e2f484
# bad: [7e2492890410e54a44b5cea9d34ecca45bf74890] Merge branch into tip/master: 'locking/core'
git bisect bad 7e2492890410e54a44b5cea9d34ecca45bf74890
# bad: [9e20f60bad4afb3e1f368e9a61d9813210ce6a29] Merge branch into tip/master: 'x86/cleanups'
git bisect bad 9e20f60bad4afb3e1f368e9a61d9813210ce6a29
# bad: [ab07ef45e638d9fdffbdd2f50521f73096acf2f1] Merge branch into tip/master: 'x86/asm'
git bisect bad ab07ef45e638d9fdffbdd2f50521f73096acf2f1
# good: [81893ca70cddbbce7cde243e0c70de6917b82956] Merge branch into tip/master: 'timers/core'
git bisect good 81893ca70cddbbce7cde243e0c70de6917b82956
# good: [d205222eb6a8e5e70c21200beb81c6e19ec211d6] x86/entry: Simplify entry_INT80_compat()
git bisect good d205222eb6a8e5e70c21200beb81c6e19ec211d6
# bad: [e2ef115813c34ea5380ac5b4879f515070150210] objtool: Fix STACK_FRAME_NON_STANDARD reloc type
git bisect bad e2ef115813c34ea5380ac5b4879f515070150210
# bad: [1b331eeea7b8676fc5dbdf80d0a07e41be226177] x86/entry: Remove skip_r11rcx
git bisect bad 1b331eeea7b8676fc5dbdf80d0a07e41be226177
# bad: [8c42819b61b8340cff0643e65b5ce6a4144ab155] x86/entry: Use PUSH_AND_CLEAR_REGS for compat
git bisect bad 8c42819b61b8340cff0643e65b5ce6a4144ab155
# first bad commit: [8c42819b61b8340cff0643e65b5ce6a4144ab155] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

2022-05-23 05:43:02

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On 5/19/22 18:11, Lai Jiangshan wrote:
> On Fri, May 20, 2022 at 1:35 AM Josh Poimboeuf <[email protected]> wrote:
>>
>> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
>> index ed2be3615b50..f76e674d22c4 100644
>> --- a/arch/x86/entry/entry_64_compat.S
>> +++ b/arch/x86/entry/entry_64_compat.S
>> @@ -200,7 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
>> SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
>> movl %eax, %eax /* discard orig_ax high bits */
>> pushq %rax /* pt_regs->orig_ax */
>> - PUSH_AND_CLEAR_REGS rax=$-ENOSYS
>> + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
>
> Some comments need to be here to explain why %rcx is stashed in %rbp.
>

I am curious. That comment wasn't needed before the problem fixed here
was introduced. The comment is unrelated to the bug fix. Why is it needed
now, and why would it be a prerequisite for fixing a critical bug ?
Shouldn't that comment be added in a separate patch ?

Thanks,
Guenter

2022-05-23 06:30:23

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On Thu, May 19, 2022 at 07:00:09PM +0200, Peter Zijlstra wrote:
> On Thu, May 19, 2022 at 09:24:11AM -0700, Guenter Roeck wrote:
> > On Fri, May 06, 2022 at 02:14:34PM +0200, Peter Zijlstra wrote:
> > > Since the upper regs don't exist for ia32 code, preserving them
> > > doesn't hurt and it simplifies the code.
> > >
> > > This doesn't add any attack surface that would not already be
> > > available through INT80.
> > >
> > > Notably:
> > >
> > > - 32bit SYSENTER: didn't clear si, dx, cx.
> > >
> > > - 32bit SYSCALL, INT80: *do* clear si since the C functions don't
> > > take a second argument.
> > >
> > > - 64bit: didn't clear si since the C functions take a second
> > > argument; except the error_entry path might have only one argument,
> > > so clearing si was missing here.
> > >
> > > 32b SYSENTER should be clearing all those 3 registers, nothing uses them
> > > and selftests pass.
> > >
> > > Unconditionally clear rsi since it simplifies code.
> > >
> > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > Reviewed-by: Borislav Petkov <[email protected]>
> >
> > linux-next (next-20220519) crashes due to this patch when booting
> > q35:EPYC-Rome in qemu.
>
> Could you try backing out each of the hunks one at a time? They're all
> more or less independent.
>
> My bet with this being a #PF on an AMD machine, it's either the SI clear
> or the SYSCALL change.

I think this should fix it:

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index a97cc78ecb92..29b36e9e4e74 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/

-.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -73,7 +73,7 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
+ pushq \rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
pushq %r9 /* pt_regs->r9 */
@@ -115,8 +115,8 @@ For 32-bit we have the following conventions - kernel is built with

.endm

-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
+ PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
CLEAR_REGS
.endm

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index ed2be3615b50..2d40dd132442 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -200,7 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
- PUSH_AND_CLEAR_REGS rax=$-ENOSYS
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS rcx=%rbx
UNWIND_HINT_REGS

movq %rsp, %rdi

2022-05-23 07:16:48

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/entry: Use PUSH_AND_CLEAR_REGS for compat

On Fri, May 20, 2022 at 09:11:55AM +0800, Lai Jiangshan wrote:
> On Fri, May 20, 2022 at 1:35 AM Josh Poimboeuf <[email protected]> wrote:
> >
> > diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> > index ed2be3615b50..f76e674d22c4 100644
> > --- a/arch/x86/entry/entry_64_compat.S
> > +++ b/arch/x86/entry/entry_64_compat.S
> > @@ -200,7 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
> > SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
> > movl %eax, %eax /* discard orig_ax high bits */
> > pushq %rax /* pt_regs->orig_ax */
> > - PUSH_AND_CLEAR_REGS rax=$-ENOSYS
> > + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
>
> Some comments need to be here to explain why %rcx is stashed in %rbp.
>
> The code doing the stash in userspace may be in
> arch/x86/entry/vdso/vdso32/system_call.S (see SYSCALL_SEQUENCE)

I do agree a comment would be good, but looking at that maze, I'm not
sure I'm qualified to give it a proper one ;-)

My best theory is: __kernel_vsyscall() stashes CX in BP before SYSCALL
can overwrite it, because SYSCALL uses CX to stash the return address.
And then PUSH_AND_CLEAR_REGS puts the original CX value back in pt_regs,
because CX is (presumably?) a syscall function argument.

My patch description said that CX must have gotten corrupted in user
space, but that's wrong because __kernel_vsyscall() pushes/pops CX
around the SYSCALL.

But alas it's too late to fix the commit log because it's already been
committed and the tip maintainers are getting pull requests ready for
the merge window.

--
Josh