This short-circuit path in sysret_signal looks wrong to me.
AFAICT, in practice the branch is never taken--and if it were,
it would go wrong. To wit, try loading a module whose init
function does set_thread_flag(TIF_IRET), and see insmod crash
(presumably with a wrong user stack pointer).
This is because the FIXUP_TOP_OF_STACK work hasn't been done yet
when we jump around the call to ptregscall_common and get to
int_with_check--where it expects the user RSP,SS,CS and EFLAGS to
have been stored by FIXUP_TOP_OF_STACK.
I don't think it's normally possible to get to sysret_signal with no
_TIF_DO_NOTIFY_MASK bits set anyway, so these two instructions are
already superfluous. If it ever did happen, it is harmless to call
do_notify_resume with nothing for it to do.
Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86/kernel/entry_64.S | 6 +-----
1 files changed, 1 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df..f9c859d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -296,16 +296,12 @@ sysret_careful:
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- testl $_TIF_DO_NOTIFY_MASK,%edx
- jz 1f
-
- /* Really a signal */
/* edx: work flags (arg3) */
leaq do_notify_resume(%rip),%rax
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
-1: movl $_TIF_NEED_RESCHED,%edi
+ movl $_TIF_NEED_RESCHED,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
DISABLE_INTERRUPTS(CLBR_NONE)
This adds a fast path for 64-bit syscall entry and exit when
TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing.
This path does not need to save and restore all registers as
the general case of tracing does. Avoiding the iret return path
when syscall audit is enabled helps performance a lot.
Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86/kernel/entry_64.S | 42 ++++++++++++++++++++++++++++++++++++++++++
kernel/auditsc.c | 3 ++-
2 files changed, 44 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index f9c859d..9b152d5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,12 @@
#include <asm/irqflags.h>
#include <asm/paravirt.h>
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_64BIT 0x80000000
+#define __AUDIT_ARCH_LE 0x40000000
+
.code64
#ifndef CONFIG_PREEMPT
@@ -246,6 +252,7 @@ ENTRY(system_call_after_swapgs)
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz tracesys
+system_call_fastpath:
cmpq $__NR_syscall_max,%rax
ja badsys
movq %r10,%rcx
@@ -296,6 +303,8 @@ sysret_careful:
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ bt $TIF_SYSCALL_AUDIT,%edx
+ jc sysret_audit
/* edx: work flags (arg3) */
leaq do_notify_resume(%rip),%rax
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
@@ -312,8 +321,41 @@ badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
+ /*
+ * Fast path for syscall audit without full syscall trace.
+ * We just call audit_syscall_entry() directly, and then
+ * jump back to the normal fast path.
+ */
+auditsys:
+ movq %r10,%r9 /* 6th arg: 4th syscall arg */
+ movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
+ movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
+ movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
+ movq %rax,%rsi /* 2nd arg: syscall number */
+ movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
+ call audit_syscall_entry
+ LOAD_ARGS 0 /* reload call-clobbered registers */
+ jmp system_call_fastpath
+
+ /*
+ * Return fast path for syscall audit. Call audit_syscall_exit()
+ * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
+ * masked off.
+ */
+sysret_audit:
+ movq %rax,%rsi /* second arg, syscall return value */
+ cmpq $0,%rax /* is it < 0? */
+ setl %al /* 1 if so, 0 if not */
+ movzbl %al,%edi /* zero-extend that into %edi */
+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
+ call audit_syscall_exit
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
+ jmp sysret_check
+
/* Do syscall tracing */
tracesys:
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP),threadinfo_flags(%rcx)
+ jz auditsys
SAVE_REST
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
FIXUP_TOP_OF_STACK %rdi
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c10e7aa..4699950 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major,
struct audit_context *context = tsk->audit_context;
enum audit_state state;
- BUG_ON(!context);
+ if (unlikely(!context))
+ return;
/*
* This happens only on certain architectures that make system
This adds fast paths for 32-bit syscall entry and exit when
TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing.
These paths does not need to save and restore all registers as
the general case of tracing does. Avoiding the iret return path
when syscall audit is enabled helps performance a lot.
Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86/ia32/ia32entry.S | 78 +++++++++++++++++++++++++++++++++++++++++---
arch/x86/kernel/entry_64.S | 1 +
2 files changed, 74 insertions(+), 5 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b5e329d..f38a354 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -15,6 +15,11 @@
#include <asm/irqflags.h>
#include <linux/linkage.h>
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_LE 0x40000000
+
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
.macro IA32_ARG_FIXUP noebp=0
@@ -131,13 +136,17 @@ sysenter_do_call:
cmpl $(IA32_NR_syscalls-1),%eax
ja ia32_badsys
IA32_ARG_FIXUP 1
+sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
jnz int_ret_from_sys_call
+ bt $TIF_SYSCALL_AUDIT,threadinfo_flags(%r10)
+ jc sysexit_audit
+sysexit_from_sys_call:
andl $~TS_COMPAT,threadinfo_status(%r10)
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp)
@@ -156,9 +165,56 @@ sysenter_do_call:
/* sysexit */
.byte 0xf, 0x35
-sysenter_tracesys:
+ .macro auditsys_entry dispatch
+ movl %esi,%r9d /* 6th arg: 4th syscall arg */
+ movl %edx,%r8d /* 5th arg: 3rd syscall arg */
+ /* (already in %ecx) 4th arg: 2nd syscall arg */
+ movl %ebx,%edx /* 3rd arg: 1st syscall arg */
+ movl %eax,%esi /* 2nd arg: syscall number */
+ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
+ call audit_syscall_entry
+ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
+ cmpl $(IA32_NR_syscalls-1),%eax
+ ja ia32_badsys
+ movl %ebx,%edi /* reload 1st syscall arg */
+ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
+ movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
+ movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
+ movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
+ movl %ebp,%r9d /* reload 6th syscall arg */
+ jmp \dispatch
+ .endm
+
+ .macro auditsys_exit exit
+ TRACE_IRQS_ON
+ sti
+ movl %eax,%esi /* second arg, syscall return value */
+ cmpl $0,%eax /* is it < 0? */
+ setl %al /* 1 if so, 0 if not */
+ movzbl %al,%edi /* zero-extend that into %edi */
+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
+ call audit_syscall_exit
+ GET_THREAD_INFO(%r10)
+ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
+ cli
+ TRACE_IRQS_OFF
+ testl %edi,threadinfo_flags(%r10)
+ jnz int_with_check
+ jmp \exit
+ .endm
+
+sysenter_auditsys:
CFI_RESTORE_STATE
+ auditsys_entry sysenter_dispatch
+
+sysexit_audit:
+ auditsys_exit sysexit_from_sys_call
+
+sysenter_tracesys:
xchgl %r9d,%ebp
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP),threadinfo_flags(%r10)
+ jz sysenter_auditsys
SAVE_REST
CLEAR_RREGS
movq %r9,R9(%rsp)
@@ -238,13 +294,17 @@ cstar_do_call:
cmpl $IA32_NR_syscalls-1,%eax
ja ia32_badsys
IA32_ARG_FIXUP 1
+cstar_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
- jnz int_ret_from_sys_call
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ jnz int_ret_from_sys_call
+ bt $TIF_SYSCALL_AUDIT,threadinfo_flags(%r10)
+ jc sysretl_audit
+sysretl_from_sys_call:
andl $~TS_COMPAT,threadinfo_status(%r10)
RESTORE_ARGS 1,-ARG_SKIP,1,1,1
movl RIP-ARGOFFSET(%rsp),%ecx
@@ -257,9 +317,17 @@ cstar_do_call:
swapgs
sysretl
-cstar_tracesys:
+cstar_auditsys:
CFI_RESTORE_STATE
+ auditsys_entry cstar_dispatch
+
+sysretl_audit:
+ auditsys_exit sysretl_from_sys_call
+
+cstar_tracesys:
xchgl %r9d,%ebp
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP),threadinfo_flags(%r10)
+ jz cstar_auditsys
SAVE_REST
CLEAR_RREGS
movq %r9,R9(%rsp)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 9b152d5..01c3e6b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -375,6 +375,7 @@ tracesys:
* Has correct top of stack, but partial stack frame.
*/
.globl int_ret_from_sys_call
+ .globl int_with_check
int_ret_from_sys_call:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
This adds fast paths for 32-bit syscall entry and exit when
TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing.
These paths does not need to save and restore all registers as
the general case of tracing does. Avoiding the iret return path
when syscall audit is enabled helps performance a lot.
Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86/kernel/entry_32.S | 48 ++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c778e4f..daf1361 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -53,6 +53,11 @@
#include <asm/processor-flags.h>
#include "irq_vectors.h"
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_LE 0x40000000
+
/*
* We use macros for low-level operations which need to be overridden
* for paravirtualization. The following will never clobber any registers:
@@ -332,7 +337,8 @@ sysenter_past_esp:
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
- jnz syscall_trace_entry
+ jnz sysenter_audit
+sysenter_do_call:
cmpl $(nr_syscalls), %eax
jae syscall_badsys
call *sys_call_table(,%eax,4)
@@ -342,7 +348,8 @@ sysenter_past_esp:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
- jne syscall_exit_work
+ jne sysexit_audit
+sysenter_exit:
/* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx
movl PT_OLDESP(%esp), %ecx
@@ -350,6 +357,43 @@ sysenter_past_esp:
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
ENABLE_INTERRUPTS_SYSCALL_RET
+
+sysenter_audit:
+ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP),TI_flags(%ebp)
+ jnz syscall_trace_entry
+ addl $4,%esp
+ CFI_ADJUST_CFA_OFFSET -4
+ /* %esi already in 8(%esp) 6th arg: 4th syscall arg */
+ /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */
+ /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */
+ movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
+ movl %eax,%edx /* 2nd arg: syscall number */
+ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
+ call audit_syscall_entry
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ movl PT_EAX(%esp),%eax /* reload syscall number */
+ jmp sysenter_do_call
+
+sysexit_audit:
+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
+ jne syscall_exit_work
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_ANY)
+ movl %eax,%edx /* second arg, syscall return value */
+ cmpl $0,%eax /* is it < 0? */
+ setl %al /* 1 if so, 0 if not */
+ movzbl %al,%eax /* zero-extend that */
+ inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
+ call audit_syscall_exit
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
+ jne syscall_exit_work
+ movl PT_EAX(%esp),%eax /* reload syscall return value */
+ jmp sysenter_exit
+
CFI_ENDPROC
.pushsection .fixup,"ax"
2: movl $0,PT_FS(%esp)
On Sun, 6 Jul 2008, Roland McGrath wrote:
>
> This adds fast paths for 32-bit syscall entry and exit when
> TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing.
> These paths does not need to save and restore all registers as
> the general case of tracing does. Avoiding the iret return path
> when syscall audit is enabled helps performance a lot.
Please don't do it like this.
I realize that Fedora enables audit by default, and that you care about
the speed of the audit path, but I care about the speed of the *non*audit
path. And you just slowed it down unnecessarily.
> TRACE_IRQS_OFF
> - testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
> + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
> jnz int_ret_from_sys_call
> + bt $TIF_SYSCALL_AUDIT,threadinfo_flags(%r10)
> + jc sysexit_audit
Please don't add more instructions to the fastpath. Especially not
relatively slow ones like 'bt'.
So keep the
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz audit_or_other_work
and then in that *slow*path in int_ret_from_sys_call, do
audit_or_other_work:
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
jnz int_ret_from_sys_call
sysexit_audit:
....
so that we don't have any extra costs at all for the non-audit case.
I realize that maybe you don't care too much about the ia32 emulation, but
it's going to be the common case for some distros (I think several distros
end up with a 32-bit user space even when they use a 64-bit kernel).
Linus
On Mon, 7 Jul 2008, Linus Torvalds wrote:
>
> Please don't add more instructions to the fastpath. Especially not
> relatively slow ones like 'bt'.
Hmm. If I'm reading it right, you seem to have done this correctly in 4/4
for the native x86-32 patch. So it looks like you were just a bit lazy in
3/4?
Or am I missing something?
Linus
I don't know if there was some reason I did it that way rather than the
other. I don't remember in which order I wrote and rewrote the patches.
I'll cop to laziness if you like.
It reminded me that I'd never tested the AMD (syscall/sysret) version of
this path. That was clearly just plain lazy, and it was in fact broken.
It's a good thing you called me names and made me feel bad about myself,
so I cast about for more of my failings.
This version of the patch changes what you didn't like, and it works
right on both the AMD (syscall) and Intel (sysenter) paths. (Yes, I
had already tested all the other paths. This is the only one that
is not used on Intel hardware.)
Is bt slower than testl? Or do you just mean having another load?
(I used bt there because I saw it used in entry_64.S for all cases
of testing for only one bit at a time. I don't claim to really know
about these things, but I had figured a second load from the same
location just loaded (even the identical address calculation) would
be very fast.)
Thanks,
Roland
---
[PATCH 3/4] x86_64 ia32 syscall audit fast-path
This adds fast paths for 32-bit syscall entry and exit when
TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing.
These paths does not need to save and restore all registers as
the general case of tracing does. Avoiding the iret return path
when syscall audit is enabled helps performance a lot.
Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86/ia32/ia32entry.S | 78 +++++++++++++++++++++++++++++++++++++++++--
arch/x86/kernel/entry_64.S | 1 +
2 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b5e329d..b2b8cda 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -15,6 +15,11 @@
#include <asm/irqflags.h>
#include <linux/linkage.h>
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_LE 0x40000000
+
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
.macro IA32_ARG_FIXUP noebp=0
@@ -131,13 +136,15 @@ sysenter_do_call:
cmpl $(IA32_NR_syscalls-1),%eax
ja ia32_badsys
IA32_ARG_FIXUP 1
+sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
- jnz int_ret_from_sys_call
+ jnz sysexit_audit
+sysexit_from_sys_call:
andl $~TS_COMPAT,threadinfo_status(%r10)
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp)
@@ -156,9 +163,59 @@ sysenter_do_call:
/* sysexit */
.byte 0xf, 0x35
-sysenter_tracesys:
+ .macro auditsys_entry_common
+ movl %esi,%r9d /* 6th arg: 4th syscall arg */
+ movl %edx,%r8d /* 5th arg: 3rd syscall arg */
+ /* (already in %ecx) 4th arg: 2nd syscall arg */
+ movl %ebx,%edx /* 3rd arg: 1st syscall arg */
+ movl %eax,%esi /* 2nd arg: syscall number */
+ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
+ call audit_syscall_entry
+ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
+ cmpl $(IA32_NR_syscalls-1),%eax
+ ja ia32_badsys
+ movl %ebx,%edi /* reload 1st syscall arg */
+ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
+ movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
+ movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
+ movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
+ .endm
+
+ .macro auditsys_exit exit
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ jnz int_ret_from_sys_call
+ TRACE_IRQS_ON
+ sti
+ movl %eax,%esi /* second arg, syscall return value */
+ cmpl $0,%eax /* is it < 0? */
+ setl %al /* 1 if so, 0 if not */
+ movzbl %al,%edi /* zero-extend that into %edi */
+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
+ call audit_syscall_exit
+ GET_THREAD_INFO(%r10)
+ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
+ movl RBP-ARGOFFSET(%rsp),%ebp /* reload user register value */
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
+ cli
+ TRACE_IRQS_OFF
+ testl %edi,threadinfo_flags(%r10)
+ jnz int_with_check
+ jmp \exit
+ .endm
+
+sysenter_auditsys:
CFI_RESTORE_STATE
+ auditsys_entry_common
+ movl %ebp,%r9d /* reload 6th syscall arg */
+ jmp sysenter_dispatch
+
+sysexit_audit:
+ auditsys_exit sysexit_from_sys_call
+
+sysenter_tracesys:
xchgl %r9d,%ebp
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP),threadinfo_flags(%r10)
+ jz sysenter_auditsys
SAVE_REST
CLEAR_RREGS
movq %r9,R9(%rsp)
@@ -238,13 +295,15 @@ cstar_do_call:
cmpl $IA32_NR_syscalls-1,%eax
ja ia32_badsys
IA32_ARG_FIXUP 1
+cstar_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
- jnz int_ret_from_sys_call
+ jnz sysretl_audit
+sysretl_from_sys_call:
andl $~TS_COMPAT,threadinfo_status(%r10)
RESTORE_ARGS 1,-ARG_SKIP,1,1,1
movl RIP-ARGOFFSET(%rsp),%ecx
@@ -257,8 +316,19 @@ cstar_do_call:
swapgs
sysretl
-cstar_tracesys:
+cstar_auditsys:
CFI_RESTORE_STATE
+ movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
+ auditsys_entry_common
+ movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
+ jmp cstar_dispatch
+
+sysretl_audit:
+ auditsys_exit sysretl_from_sys_call
+
+cstar_tracesys:
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SECCOMP),threadinfo_flags(%r10)
+ jz cstar_auditsys
xchgl %r9d,%ebp
SAVE_REST
CLEAR_RREGS
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 9b152d5..01c3e6b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -375,6 +375,7 @@ tracesys:
* Has correct top of stack, but partial stack frame.
*/
.globl int_ret_from_sys_call
+ .globl int_with_check
int_ret_from_sys_call:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
On Mon, 7 Jul 2008, Roland McGrath wrote:
>
> Is bt slower than testl?
On many microarchitectures, yes. Especially for a memory operand.
That said, depending on the size of the constant, 'bt' may be _smaller_
than testl (8-bit constant vs 32-bit one). Which can make up for it.
> (I used bt there because I saw it used in entry_64.S for all cases
> of testing for only one bit at a time.
I haven't checked recent CPU's, it may not matter much on ones that
support 64-bit. But bt with a memop was traditionally quite a bit more
expensive than 'test'.
I too am too lazy to check. Once it's in the slow-path, it doesn't much
matter. We're talking a few cycles here.
Linus
* Roland McGrath <[email protected]> wrote:
> I don't know if there was some reason I did it that way rather than
> the other. I don't remember in which order I wrote and rewrote the
> patches. I'll cop to laziness if you like.
>
> It reminded me that I'd never tested the AMD (syscall/sysret) version
> of this path. That was clearly just plain lazy, and it was in fact
> broken. It's a good thing you called me names and made me feel bad
> about myself, so I cast about for more of my failings.
>
> This version of the patch changes what you didn't like, and it works
> right on both the AMD (syscall) and Intel (sysenter) paths. (Yes, I
> had already tested all the other paths. This is the only one that is
> not used on Intel hardware.)
do you have a delta patch against tip/x86/audit-speedup by any chance?
That is a topic branch of your previous drop, which got tested as well
to a certain degree. Would make it easier to see what changed, would
make the merge have a more nuanced history, etc.
or, alternatively, if you have a -git based branch that i could pull,
that would be nice as well. (i can compare old-x86/audit-speedup to
new-x86/audit-speedup.)
Ingo
> do you have a delta patch against tip/x86/audit-speedup by any chance?
> That is a topic branch of your previous drop, which got tested as well
> to a certain degree. Would make it easier to see what changed, would
> make the merge have a more nuanced history, etc.
>
> or, alternatively, if you have a -git based branch that i could pull,
> that would be nice as well. (i can compare old-x86/audit-speedup to
> new-x86/audit-speedup.)
Here is a GIT branch with freshly-rebased patches from today's upstream.
There was some merging fixup to be done after the x86/step changes went in.
I don't think it's useful to have a history merged upstream that includes
the original broken 3/4 patch. The newer version both has the straight
line paths Linus wanted, and works right on AMD. If we had the old version
in GIT, then there would be a point in the history that builds a kernel
that's broken for all 32-bit processes on 64-bit AMD hardware. Anyone
doing a bisect in the future could get unlucky hit that along the way, etc.
Anyway, you'd have to redo this merging work with x86/step just to get one
that builds now.
The following changes since commit 93ded9b8fd42abe2c3607097963d8de6ad9117eb:
Linus Torvalds (1):
Merge git://git.kernel.org/.../gregkh/usb-2.6
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/frob/linux-2.6-roland.git x86/auditsc
Roland McGrath (4):
x86_64: remove bogus optimization in sysret_signal
x86_64 syscall audit fast-path
x86_64 ia32 syscall audit fast-path
i386 syscall audit fast-path
arch/x86/ia32/ia32entry.S | 78 +++++++++++++++++++++++++++++++++++++++++--
arch/x86/kernel/entry_32.S | 48 ++++++++++++++++++++++++++-
arch/x86/kernel/entry_64.S | 49 +++++++++++++++++++++++++---
kernel/auditsc.c | 3 +-
4 files changed, 166 insertions(+), 12 deletions(-)
Thanks,
Roland
On Mon, 21 Jul 2008, Roland McGrath wrote:
>
> Here is a GIT branch with freshly-rebased patches from today's upstream.
> There was some merging fixup to be done after the x86/step changes went in.
Ok, so I decided to try this, because my new toy (Nehalem) showed some
absolutely horrid performance with the Fedora kernel that went away when I
compiled my own sane kernel.
And I suspect it's due to the audit path and auditd being enabled (for no
good reason that I can tell, except to slow everything down, since nobody
sane cares) by default.
HOWEVER. When I pull from your tree, I can no longer compile with the only
sane default (namely "CONFIG_AUDIT is stupid, nobody should use it"),
because I get
arch/x86/kernel/built-in.o: In function `auditsys':
signal_64.c:(.text+0x1f40): undefined reference to `audit_syscall_entry'
arch/x86/kernel/built-in.o: In function `sysret_audit':
signal_64.c:(.text+0x1f85): undefined reference to `audit_syscall_exit'
arch/x86/ia32/built-in.o: In function `sysenter_auditsys':
(.text+0xdd): undefined reference to `audit_syscall_entry'
arch/x86/ia32/built-in.o: In function `sysexit_audit':
(.text+0x128): undefined reference to `audit_syscall_exit'
arch/x86/ia32/built-in.o: In function `cstar_auditsys':
(.text+0x303): undefined reference to `audit_syscall_entry'
arch/x86/ia32/built-in.o: In function `sysretl_audit':
(.text+0x350): undefined reference to `audit_syscall_exit'
make: *** [.tmp_vmlinux1] Error 1
so while I would like to merge this, I really can't.
I'll test the performance anyway (with CONFIG_AUDIT enabled, of course).
Linus
On Wed, 23 Jul 2008, Linus Torvalds wrote:
>
> I'll test the performance anyway (with CONFIG_AUDIT enabled, of course).
Does indeed look like a big improvement, but as mentioned, no way I can
pull it in this form.
Linus
Oops! Duh. I replaced the same GIT branch with a rebased set that uses
#ifdef CONFIG_AUDITSYSCALL. No changes to the code you tested, only that
it compiles away to exactly no difference when audit is compiled out.
The following changes since commit 20b7997e8abdf338dcc27fb4f1333c4973a7f113:
Linus Torvalds (1):
Merge branch 'for-linus' of git://git.kernel.org/.../drzeus/mmc
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/frob/linux-2.6-roland.git x86/auditsc
Roland McGrath (4):
x86_64: remove bogus optimization in sysret_signal
x86_64 syscall audit fast-path
x86_64 ia32 syscall audit fast-path
i386 syscall audit fast-path
arch/x86/ia32/ia32entry.S | 91 ++++++++++++++++++++++++++++++++++++++++++--
arch/x86/kernel/entry_32.S | 55 +++++++++++++++++++++++++-
arch/x86/kernel/entry_64.S | 55 ++++++++++++++++++++++++--
kernel/auditsc.c | 3 +-
4 files changed, 192 insertions(+), 12 deletions(-)
Thanks,
Roland