From: YiFei Zhu <[email protected]>
Seccomp cache emulator needs to know all the architecture numbers
that syscall_get_arch() could return for the kernel build in order
to generate a cache for all of them.
The array is declared in header as static __maybe_unused const
to maximize compiler optimiation opportunities such as loop
unrolling.
Signed-off-by: YiFei Zhu <[email protected]>
---
arch/alpha/include/asm/syscall.h | 4 ++++
arch/arc/include/asm/syscall.h | 24 +++++++++++++++++++-----
arch/arm/include/asm/syscall.h | 4 ++++
arch/arm64/include/asm/syscall.h | 4 ++++
arch/c6x/include/asm/syscall.h | 13 +++++++++++--
arch/csky/include/asm/syscall.h | 4 ++++
arch/h8300/include/asm/syscall.h | 4 ++++
arch/hexagon/include/asm/syscall.h | 4 ++++
arch/ia64/include/asm/syscall.h | 4 ++++
arch/m68k/include/asm/syscall.h | 4 ++++
arch/microblaze/include/asm/syscall.h | 4 ++++
arch/mips/include/asm/syscall.h | 16 ++++++++++++++++
arch/nds32/include/asm/syscall.h | 13 +++++++++++--
arch/nios2/include/asm/syscall.h | 4 ++++
arch/openrisc/include/asm/syscall.h | 4 ++++
arch/parisc/include/asm/syscall.h | 7 +++++++
arch/powerpc/include/asm/syscall.h | 14 ++++++++++++++
arch/riscv/include/asm/syscall.h | 14 ++++++++++----
arch/s390/include/asm/syscall.h | 7 +++++++
arch/sh/include/asm/syscall_32.h | 17 +++++++++++------
arch/sparc/include/asm/syscall.h | 9 +++++++++
arch/x86/include/asm/syscall.h | 11 +++++++++++
arch/x86/um/asm/syscall.h | 14 ++++++++++----
arch/xtensa/include/asm/syscall.h | 4 ++++
24 files changed, 184 insertions(+), 23 deletions(-)
diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h
index 11c688c1d7ec..625ac9b23f37 100644
--- a/arch/alpha/include/asm/syscall.h
+++ b/arch/alpha/include/asm/syscall.h
@@ -4,6 +4,10 @@
#include <uapi/linux/audit.h>
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_ALPHA
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_ALPHA;
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 94529e89dff0..899c13cbf5cc 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -65,14 +65,28 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
}
}
+#ifdef CONFIG_ISA_ARCOMPACT
+# ifdef CONFIG_CPU_BIG_ENDIAN
+# define SYSCALL_ARCH AUDIT_ARCH_ARCOMPACTBE
+# else
+# define SYSCALL_ARCH AUDIT_ARCH_ARCOMPACT
+# endif /* CONFIG_CPU_BIG_ENDIAN */
+#else
+# ifdef CONFIG_CPU_BIG_ENDIAN
+# define SYSCALL_ARCH AUDIT_ARCH_ARCV2BE
+# else
+# define SYSCALL_ARCH AUDIT_ARCH_ARCV2
+# endif /* CONFIG_CPU_BIG_ENDIAN */
+#endif /* CONFIG_ISA_ARCOMPACT */
+
+static __maybe_unused const int syscall_arches[] = {
+ SYSCALL_ARCH
+};
+
static inline int
syscall_get_arch(struct task_struct *task)
{
- return IS_ENABLED(CONFIG_ISA_ARCOMPACT)
- ? (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
- ? AUDIT_ARCH_ARCOMPACTBE : AUDIT_ARCH_ARCOMPACT)
- : (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
- ? AUDIT_ARCH_ARCV2BE : AUDIT_ARCH_ARCV2);
+ return SYSCALL_ARCH;
}
#endif
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index fd02761ba06c..33ade26e3956 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -73,6 +73,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(®s->ARM_r0 + 1, args, 5 * sizeof(args[0]));
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_ARM
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
/* ARM tasks don't change audit architectures on the fly. */
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index cfc0672013f6..77f3d300e7a0 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -82,6 +82,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(®s->regs[1], args, 5 * sizeof(args[0]));
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_ARM, AUDIT_ARCH_AARCH64
+};
+
/*
* We don't care about endianness (__AUDIT_ARCH_LE bit) here because
* AArch64 has the same system calls both on little- and big- endian.
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index 38f3e2284ecd..0d78c67ee1fc 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -66,10 +66,19 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->a9 = *args;
}
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define SYSCALL_ARCH AUDIT_ARCH_C6XBE
+#else
+#define SYSCALL_ARCH AUDIT_ARCH_C6X
+#endif
+
+static __maybe_unused const int syscall_arches[] = {
+ SYSCALL_ARCH
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
- return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
- ? AUDIT_ARCH_C6XBE : AUDIT_ARCH_C6X;
+ return SYSCALL_ARCH;
}
#endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index f624fa3bbc22..86242d2850d7 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -68,6 +68,10 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
memcpy(®s->a1, args, 5 * sizeof(regs->a1));
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_CSKY
+};
+
static inline int
syscall_get_arch(struct task_struct *task)
{
diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h
index 01666b8bb263..775f6ac8fde3 100644
--- a/arch/h8300/include/asm/syscall.h
+++ b/arch/h8300/include/asm/syscall.h
@@ -28,6 +28,10 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
*args = regs->er6;
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_H8300
+};
+
static inline int
syscall_get_arch(struct task_struct *task)
{
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index f6e454f18038..6ee21a76f6a3 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -45,6 +45,10 @@ static inline long syscall_get_return_value(struct task_struct *task,
return regs->r00;
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_HEXAGON
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_HEXAGON;
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 6c6f16e409a8..19456125c89a 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -71,6 +71,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
ia64_syscall_get_set_arguments(task, regs, args, 1);
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_IA64
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_IA64;
diff --git a/arch/m68k/include/asm/syscall.h b/arch/m68k/include/asm/syscall.h
index 465ac039be09..031b051f9026 100644
--- a/arch/m68k/include/asm/syscall.h
+++ b/arch/m68k/include/asm/syscall.h
@@ -4,6 +4,10 @@
#include <uapi/linux/audit.h>
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_M68K
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_M68K;
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 3a6924f3cbde..28cde14056d1 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -105,6 +105,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
asmlinkage unsigned long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_MICROBLAZE
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_MICROBLAZE;
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 25fa651c937d..29e4c1c47c54 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -140,6 +140,22 @@ extern const unsigned long sys_call_table[];
extern const unsigned long sys32_call_table[];
extern const unsigned long sysn32_call_table[];
+static __maybe_unused const int syscall_arches[] = {
+#ifdef __LITTLE_ENDIAN
+ AUDIT_ARCH_MIPSEL,
+# ifdef CONFIG_64BIT
+ AUDIT_ARCH_MIPSEL64,
+ AUDIT_ARCH_MIPSEL64N32,
+# endif /* CONFIG_64BIT */
+#else
+ AUDIT_ARCH_MIPS,
+# ifdef CONFIG_64BIT
+ AUDIT_ARCH_MIPS64,
+ AUDIT_ARCH_MIPS64N32,
+# endif /* CONFIG_64BIT */
+#endif /* __LITTLE_ENDIAN */
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
int arch = AUDIT_ARCH_MIPS;
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index 7b5180d78e20..2dd5e33bcfcb 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -154,11 +154,20 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
memcpy(®s->uregs[0] + 1, args, 5 * sizeof(args[0]));
}
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define SYSCALL_ARCH AUDIT_ARCH_NDS32BE
+#else
+#define SYSCALL_ARCH AUDIT_ARCH_NDS32
+#endif
+
+static __maybe_unused const int syscall_arches[] = {
+ SYSCALL_ARCH
+};
+
static inline int
syscall_get_arch(struct task_struct *task)
{
- return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
- ? AUDIT_ARCH_NDS32BE : AUDIT_ARCH_NDS32;
+ return SYSCALL_ARCH;
}
#endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 526449edd768..8fa2716cac5a 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -69,6 +69,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->r9 = *args;
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_NIOS2
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_NIOS2;
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index e6383be2a195..4eb28ad08042 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -64,6 +64,10 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
memcpy(®s->gpr[3], args, 6 * sizeof(args[0]));
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_OPENRISC
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_OPENRISC;
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 00b127a5e09b..2915f140c9fd 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -55,6 +55,13 @@ static inline void syscall_rollback(struct task_struct *task,
/* do nothing */
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_PARISC,
+#ifdef CONFIG_64BIT
+ AUDIT_ARCH_PARISC64,
+#endif
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
int arch = AUDIT_ARCH_PARISC;
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index fd1b518eed17..781deb211e3d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -104,6 +104,20 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->orig_gpr3 = args[0];
}
+static __maybe_unused const int syscall_arches[] = {
+#ifdef __LITTLE_ENDIAN__
+ AUDIT_ARCH_PPC | __AUDIT_ARCH_LE,
+# ifdef CONFIG_PPC64
+ AUDIT_ARCH_PPC64LE,
+# endif /* CONFIG_PPC64 */
+#else
+ AUDIT_ARCH_PPC,
+# ifdef CONFIG_PPC64
+ AUDIT_ARCH_PPC64,
+# endif /* CONFIG_PPC64 */
+#endif /* __LITTLE_ENDIAN__ */
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
int arch;
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 49350c8bd7b0..4b36d358243e 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -73,13 +73,19 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(®s->a1, args, 5 * sizeof(regs->a1));
}
-static inline int syscall_get_arch(struct task_struct *task)
-{
#ifdef CONFIG_64BIT
- return AUDIT_ARCH_RISCV64;
+#define SYSCALL_ARCH AUDIT_ARCH_RISCV64
#else
- return AUDIT_ARCH_RISCV32;
+#define SYSCALL_ARCH AUDIT_ARCH_RISCV32
#endif
+
+static __maybe_unused const int syscall_arches[] = {
+ SYSCALL_ARCH
+};
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return SYSCALL_ARCH;
}
#endif /* _ASM_RISCV_SYSCALL_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index d9d5de0f67ff..4cb9da36610a 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -89,6 +89,13 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->orig_gpr2 = args[0];
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_S390X,
+#ifdef CONFIG_COMPAT
+ AUDIT_ARCH_S390,
+#endif
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
#ifdef CONFIG_COMPAT
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index cb51a7528384..4780f2339c72 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -69,13 +69,18 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->regs[4] = args[0];
}
-static inline int syscall_get_arch(struct task_struct *task)
-{
- int arch = AUDIT_ARCH_SH;
-
#ifdef CONFIG_CPU_LITTLE_ENDIAN
- arch |= __AUDIT_ARCH_LE;
+#define SYSCALL_ARCH AUDIT_ARCH_SHEL
+#else
+#define SYSCALL_ARCH AUDIT_ARCH_SH
#endif
- return arch;
+
+static __maybe_unused const int syscall_arches[] = {
+ SYSCALL_ARCH
+};
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return SYSCALL_ARCH;
}
#endif /* __ASM_SH_SYSCALL_32_H */
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 62a5a78804c4..a458992cdcfe 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -127,6 +127,15 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->u_regs[UREG_I0 + i] = args[i];
}
+static __maybe_unused const int syscall_arches[] = {
+#ifdef CONFIG_SPARC64
+ AUDIT_ARCH_SPARC64,
+#endif
+#if !defined(CONFIG_SPARC64) || defined(CONFIG_COMPAT)
+ AUDIT_ARCH_SPARC,
+#endif
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
#if defined(CONFIG_SPARC64) && defined(CONFIG_COMPAT)
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 7cbf733d11af..e13bb2a65b6f 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -97,6 +97,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(®s->bx + i, args, n * sizeof(args[0]));
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_I386
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_I386;
@@ -152,6 +156,13 @@ static inline void syscall_set_arguments(struct task_struct *task,
}
}
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_X86_64,
+#ifdef CONFIG_IA32_EMULATION
+ AUDIT_ARCH_I386,
+#endif
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h
index 56a2f0913e3c..590a31e22b99 100644
--- a/arch/x86/um/asm/syscall.h
+++ b/arch/x86/um/asm/syscall.h
@@ -9,13 +9,19 @@ typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
-static inline int syscall_get_arch(struct task_struct *task)
-{
#ifdef CONFIG_X86_32
- return AUDIT_ARCH_I386;
+#define SYSCALL_ARCH AUDIT_ARCH_I386
#else
- return AUDIT_ARCH_X86_64;
+#define SYSCALL_ARCH AUDIT_ARCH_X86_64
#endif
+
+static __maybe_unused const int syscall_arches[] = {
+ SYSCALL_ARCH
+};
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return SYSCALL_ARCH;
}
#endif /* __UM_ASM_SYSCALL_H */
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index f9a671cbf933..3d334fb0d329 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -14,6 +14,10 @@
#include <asm/ptrace.h>
#include <uapi/linux/audit.h>
+static __maybe_unused const int syscall_arches[] = {
+ AUDIT_ARCH_XTENSA
+};
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_XTENSA;
--
2.28.0
From: YiFei Zhu
> Sent: 24 September 2020 13:44
>
> Seccomp cache emulator needs to know all the architecture numbers
> that syscall_get_arch() could return for the kernel build in order
> to generate a cache for all of them.
>
> The array is declared in header as static __maybe_unused const
> to maximize compiler optimiation opportunities such as loop
> unrolling.
I doubt the compiler will do what you want.
Looking at it, in most cases there are one or two entries.
I think only MIPS has three.
So a static inline function that contains a list of
conditionals will generate better code that any kind of
array lookup.
For x86-64 you end up with something like:
#ifdef CONFIG_IA32_EMULATION
if (sd->arch == AUDIT_ARCH_I386) return xxx;
#endif
return yyy;
Probably saves you having multiple arrays that need to be
kept carefully in step.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Thu, Sep 24, 2020 at 8:47 AM David Laight <[email protected]> wrote:
> I doubt the compiler will do what you want.
> Looking at it, in most cases there are one or two entries.
> I think only MIPS has three.
It does ;) GCC 10.2.0:
$ objdump -d kernel/seccomp.o | less
[...]
0000000000001520 <__seccomp_filter>:
[...]
1587: 41 8b 54 24 04 mov 0x4(%r12),%edx
158c: b9 08 01 00 00 mov $0x108,%ecx
1591: 81 fa 3e 00 00 c0 cmp $0xc000003e,%edx
1597: 75 2e jne 15c7 <__seccomp_filter+0xa7>
[...]
15c7: 81 fa 03 00 00 40 cmp $0x40000003,%edx
15cd: b9 40 01 00 00 mov $0x140,%ecx
15d2: 74 c5 je 1599 <__seccomp_filter+0x79>
15d4: 0f 0b ud2
[...]
0000000000001cb0 <seccomp_cache_prepare>:
[...]
1cc4: 41 b9 3e 00 00 c0 mov $0xc000003e,%r9d
[...]
1dba: 41 b9 03 00 00 40 mov $0x40000003,%r9d
[...]
0000000000002e30 <proc_pid_seccomp_cache>:
[...]
2e72: ba 3e 00 00 c0 mov $0xc000003e,%edx
[...]
2eb5: ba 03 00 00 40 mov $0x40000003,%edx
Granted, I have CC_OPTIMIZE_FOR_PERFORMANCE rather than
CC_OPTIMIZE_FOR_SIZE, but this patch itself is trying to sacrifice
some of the memory for speed.
YiFei Zhu
From: YiFei Zhu
> Sent: 24 September 2020 15:17
>
> On Thu, Sep 24, 2020 at 8:47 AM David Laight <[email protected]> wrote:
> > I doubt the compiler will do what you want.
> > Looking at it, in most cases there are one or two entries.
> > I think only MIPS has three.
>
> It does ;) GCC 10.2.0:
>
> $ objdump -d kernel/seccomp.o | less
> [...]
> 0000000000001520 <__seccomp_filter>:
> [...]
> 1587: 41 8b 54 24 04 mov 0x4(%r12),%edx
> 158c: b9 08 01 00 00 mov $0x108,%ecx
> 1591: 81 fa 3e 00 00 c0 cmp $0xc000003e,%edx
> 1597: 75 2e jne 15c7 <__seccomp_filter+0xa7>
> [...]
> 15c7: 81 fa 03 00 00 40 cmp $0x40000003,%edx
> 15cd: b9 40 01 00 00 mov $0x140,%ecx
> 15d2: 74 c5 je 1599 <__seccomp_filter+0x79>
> 15d4: 0f 0b ud2
> [...]
> 0000000000001cb0 <seccomp_cache_prepare>:
> [...]
> 1cc4: 41 b9 3e 00 00 c0 mov $0xc000003e,%r9d
> [...]
> 1dba: 41 b9 03 00 00 40 mov $0x40000003,%r9d
> [...]
> 0000000000002e30 <proc_pid_seccomp_cache>:
> [...]
> 2e72: ba 3e 00 00 c0 mov $0xc000003e,%edx
> [...]
> 2eb5: ba 03 00 00 40 mov $0x40000003,%edx
>
> Granted, I have CC_OPTIMIZE_FOR_PERFORMANCE rather than
> CC_OPTIMIZE_FOR_SIZE, but this patch itself is trying to sacrifice
> some of the memory for speed.
Don't both CC_OPTIMIZE_FOR_PERFORMANCE (-??) and CC_OPTIMIZE_FOR_SIZE (-s)
generate terrible code?
Try with a slghtly older gcc.
I think that entire optimisation (discarding const arrays)
is very recent.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Thu, Sep 24, 2020 at 9:20 AM David Laight <[email protected]> wrote:
> > Granted, I have CC_OPTIMIZE_FOR_PERFORMANCE rather than
> > CC_OPTIMIZE_FOR_SIZE, but this patch itself is trying to sacrifice
> > some of the memory for speed.
>
> Don't both CC_OPTIMIZE_FOR_PERFORMANCE (-??) and CC_OPTIMIZE_FOR_SIZE (-s)
> generate terrible code?
You have to choose one for "Compiler optimization level" in "General Setup", no?
The former is -O2 and the latter is -Os.
> Try with a slghtly older gcc.
> I think that entire optimisation (discarding const arrays)
> is very recent.
Will try, will take a while to get an old GCC to run, however :/
YiFei Zhu
On Thu, Sep 24, 2020 at 9:37 AM YiFei Zhu <[email protected]> wrote:
> > Try with a slghtly older gcc.
> > I think that entire optimisation (discarding const arrays)
> > is very recent.
>
> Will try, will take a while to get an old GCC to run, however :/
Possibly one of the oldest I can easily get to work is GCC 6.5.0, and
unrolling seems is still the case:
0000000000001560 <__seccomp_filter>:
[...]
15d4: 41 8b 74 24 04 mov 0x4(%r12),%esi
15d9: bf 08 01 00 00 mov $0x108,%edi
15de: 81 fe 3e 00 00 c0 cmp $0xc000003e,%esi
15e4: 75 30 jne 1616 <__seccomp_filter+0xb6>
[...]
1616: 81 fe 03 00 00 40 cmp $0x40000003,%esi
161c: bf 40 01 00 00 mov $0x140,%edi
1621: 74 c3 je 15e6 <__seccomp_filter+0x86>
1623: 0f 0b ud2
Am I overlooking something or should I go further back in the compiler version?
YiFei Zhu
[resend, argh, I didn't reply-all, sorry for the noise]
On Thu, Sep 24, 2020 at 07:44:17AM -0500, YiFei Zhu wrote:
> From: YiFei Zhu <[email protected]>
>
> Seccomp cache emulator needs to know all the architecture numbers
> that syscall_get_arch() could return for the kernel build in order
> to generate a cache for all of them.
>
> The array is declared in header as static __maybe_unused const
> to maximize compiler optimiation opportunities such as loop
> unrolling.
Disregarding the "how" of this, yeah, we'll certainly need something to
tell seccomp about the arrangement of syscall tables and how to find
them.
However, I'd still prefer to do this on a per-arch basis, and include
more detail, as I've got in my v1.
Something missing from both styles, though, is a consolidation of
values, where the AUDIT_ARCH* isn't reused in both the seccomp info and
the syscall_get_arch() return. The problems here were two-fold:
1) putting this in syscall.h meant you do not have full NR_syscall*
visibility on some architectures (e.g. arm64 plays weird games with
header include order).
2) seccomp needs to handle "multiplexed" tables like x86_x32 (distros
haven't removed CONFIG_X86_X32 widely yet, so it is a reality that
it must be dealt with), which means seccomp's idea of the arch
"number" can't be the same as the AUDIT_ARCH.
So, likely a combo of approaches is needed: an array (or more likely,
enum), declared in the per-arch seccomp.h file. And I don't see a way
to solve #1 cleanly.
Regardless, it needs to be split per architecture so that regressions
can be bisected/reverted/isolated cleanly. And if we can't actually test
it at runtime (or find someone who can) it's not a good idea to make the
change. :)
> [...]
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index 7cbf733d11af..e13bb2a65b6f 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -97,6 +97,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
> memcpy(®s->bx + i, args, n * sizeof(args[0]));
> }
>
> +static __maybe_unused const int syscall_arches[] = {
> + AUDIT_ARCH_I386
> +};
> +
> static inline int syscall_get_arch(struct task_struct *task)
> {
> return AUDIT_ARCH_I386;
> @@ -152,6 +156,13 @@ static inline void syscall_set_arguments(struct task_struct *task,
> }
> }
>
> +static __maybe_unused const int syscall_arches[] = {
> + AUDIT_ARCH_X86_64,
> +#ifdef CONFIG_IA32_EMULATION
> + AUDIT_ARCH_I386,
> +#endif
> +};
I'm leaving this section quoted because I'll refer to it in a later
patch review...
--
Kees Cook
On Fri, Sep 25, 2020 at 2:01 AM Kees Cook <[email protected]> wrote:
> 2) seccomp needs to handle "multiplexed" tables like x86_x32 (distros
> haven't removed CONFIG_X86_X32 widely yet, so it is a reality that
> it must be dealt with), which means seccomp's idea of the arch
> "number" can't be the same as the AUDIT_ARCH.
Sure, distros ship it; but basically nobody uses it, it doesn't have
to be fast. As long as we don't *break* it, everything's fine. And if
we ignore the existence of X32 in the fastpath, that'll just mean that
syscalls with the X32 marker bit always hit the seccomp slowpath
(because it'll look like the syscall number is out-of-bounds ) - no
problem.
On Fri, Sep 25, 2020 at 02:15:50AM +0200, Jann Horn wrote:
> On Fri, Sep 25, 2020 at 2:01 AM Kees Cook <[email protected]> wrote:
> > 2) seccomp needs to handle "multiplexed" tables like x86_x32 (distros
> > haven't removed CONFIG_X86_X32 widely yet, so it is a reality that
> > it must be dealt with), which means seccomp's idea of the arch
> > "number" can't be the same as the AUDIT_ARCH.
>
> Sure, distros ship it; but basically nobody uses it, it doesn't have
> to be fast. As long as we don't *break* it, everything's fine. And if
> we ignore the existence of X32 in the fastpath, that'll just mean that
> syscalls with the X32 marker bit always hit the seccomp slowpath
> (because it'll look like the syscall number is out-of-bounds ) - no
> problem.
You do realize that X32 is amd64 counterpart of mips n32, right? And that's
not "basically nobody uses it"...
On Fri, Sep 25, 2020 at 2:18 AM Al Viro <[email protected]> wrote:
> On Fri, Sep 25, 2020 at 02:15:50AM +0200, Jann Horn wrote:
> > On Fri, Sep 25, 2020 at 2:01 AM Kees Cook <[email protected]> wrote:
> > > 2) seccomp needs to handle "multiplexed" tables like x86_x32 (distros
> > > haven't removed CONFIG_X86_X32 widely yet, so it is a reality that
> > > it must be dealt with), which means seccomp's idea of the arch
> > > "number" can't be the same as the AUDIT_ARCH.
> >
> > Sure, distros ship it; but basically nobody uses it, it doesn't have
> > to be fast. As long as we don't *break* it, everything's fine. And if
> > we ignore the existence of X32 in the fastpath, that'll just mean that
> > syscalls with the X32 marker bit always hit the seccomp slowpath
> > (because it'll look like the syscall number is out-of-bounds ) - no
> > problem.
>
> You do realize that X32 is amd64 counterpart of mips n32, right? And that's
> not "basically nobody uses it"...
What makes X32 weird for seccomp is that it has the syscall tables for
X86-64 and X32 mushed together, using the single architecture
identifier AUDIT_ARCH_X86_64. I believe that's what Kees referred to
by "multiplexed tables".
As far as I can tell, MIPS is more well-behaved there and uses the
separate architecture identifiers
AUDIT_ARCH_MIPS|__AUDIT_ARCH_64BIT
and
AUDIT_ARCH_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_CONVENTION_MIPS64_N32.
(But no, I did not actually realize that that's what N32 is. Thanks
for the explanation, I was wondering why MIPS was the only
architecture with three architecture identifiers...)
[resending this too]
On Thu, Sep 24, 2020 at 6:01 PM Kees Cook <[email protected]> wrote:
> Disregarding the "how" of this, yeah, we'll certainly need something to
> tell seccomp about the arrangement of syscall tables and how to find
> them.
>
> However, I'd still prefer to do this on a per-arch basis, and include
> more detail, as I've got in my v1.
>
> Something missing from both styles, though, is a consolidation of
> values, where the AUDIT_ARCH* isn't reused in both the seccomp info and
> the syscall_get_arch() return. The problems here were two-fold:
>
> 1) putting this in syscall.h meant you do not have full NR_syscall*
> visibility on some architectures (e.g. arm64 plays weird games with
> header include order).
I don't get this one -- I'm not playing with NR_syscall here.
> 2) seccomp needs to handle "multiplexed" tables like x86_x32 (distros
> haven't removed CONFIG_X86_X32 widely yet, so it is a reality that
> it must be dealt with), which means seccomp's idea of the arch
> "number" can't be the same as the AUDIT_ARCH.
Why so? Does anyone actually use x32 in a container? The memory cost
and analysis cost is on everyone. The worst case scenario if we don't
support it is that the syscall is not accelerated.
> So, likely a combo of approaches is needed: an array (or more likely,
> enum), declared in the per-arch seccomp.h file. And I don't see a way
> to solve #1 cleanly.
>
> Regardless, it needs to be split per architecture so that regressions
> can be bisected/reverted/isolated cleanly. And if we can't actually test
> it at runtime (or find someone who can) it's not a good idea to make the
> change. :)
You have a good point regarding tests. Don't see how it affects
regressions though. Only one file here is ever included per-build.
> > [...]
> > diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> > index 7cbf733d11af..e13bb2a65b6f 100644
> > --- a/arch/x86/include/asm/syscall.h
> > +++ b/arch/x86/include/asm/syscall.h
> > @@ -97,6 +97,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
> > memcpy(®s->bx + i, args, n * sizeof(args[0]));
> > }
> >
> > +static __maybe_unused const int syscall_arches[] = {
> > + AUDIT_ARCH_I386
> > +};
> > +
> > static inline int syscall_get_arch(struct task_struct *task)
> > {
> > return AUDIT_ARCH_I386;
> > @@ -152,6 +156,13 @@ static inline void syscall_set_arguments(struct task_struct *task,
> > }
> > }
> >
> > +static __maybe_unused const int syscall_arches[] = {
> > + AUDIT_ARCH_X86_64,
> > +#ifdef CONFIG_IA32_EMULATION
> > + AUDIT_ARCH_I386,
> > +#endif
> > +};
>
> I'm leaving this section quoted because I'll refer to it in a later
> patch review...
>
> --
> Kees Cook
On Thu, Sep 24, 2020 at 08:27:40PM -0500, YiFei Zhu wrote:
> [resending this too]
>
> On Thu, Sep 24, 2020 at 6:01 PM Kees Cook <[email protected]> wrote:
> > Disregarding the "how" of this, yeah, we'll certainly need something to
> > tell seccomp about the arrangement of syscall tables and how to find
> > them.
> >
> > However, I'd still prefer to do this on a per-arch basis, and include
> > more detail, as I've got in my v1.
> >
> > Something missing from both styles, though, is a consolidation of
> > values, where the AUDIT_ARCH* isn't reused in both the seccomp info and
> > the syscall_get_arch() return. The problems here were two-fold:
> >
> > 1) putting this in syscall.h meant you do not have full NR_syscall*
> > visibility on some architectures (e.g. arm64 plays weird games with
> > header include order).
>
> I don't get this one -- I'm not playing with NR_syscall here.
Right, sorry, I may not have been clear. When building my RFC I noticed
that I couldn't use NR_syscall very "early" in the header file include
stack on arm64, which complicated things. So I guess what I mean is
something like "it's probably better to do all these seccomp-specific
macros/etc in asm/include/seccomp.h rather than in syscall.h because I
know at least one architecture that might cause trouble."
> > 2) seccomp needs to handle "multiplexed" tables like x86_x32 (distros
> > haven't removed CONFIG_X86_X32 widely yet, so it is a reality that
> > it must be dealt with), which means seccomp's idea of the arch
> > "number" can't be the same as the AUDIT_ARCH.
>
> Why so? Does anyone actually use x32 in a container? The memory cost
> and analysis cost is on everyone. The worst case scenario if we don't
> support it is that the syscall is not accelerated.
Ironicailly, that's the only place I actually know for sure where people
using x32 because it shows measurable (10%) speed-up for builders:
https://lore.kernel.org/lkml/CAOesGMgu1i3p7XMZuCEtj63T-ST_jh+BfaHy-K6LhgqNriKHAA@mail.gmail.com
So, yes, as you and Jann both point out, it wouldn't be terrible to just
ignore x32, it seems a shame to penalize it. That said, if the masking
step from my v1 is actually noticable on a native workload, then yeah,
probably x32 should be ignored. My instinct (not measured) is that it's
faster than walking a small array.[citation needed]
> > So, likely a combo of approaches is needed: an array (or more likely,
> > enum), declared in the per-arch seccomp.h file. And I don't see a way
> > to solve #1 cleanly.
> >
> > Regardless, it needs to be split per architecture so that regressions
> > can be bisected/reverted/isolated cleanly. And if we can't actually test
> > it at runtime (or find someone who can) it's not a good idea to make the
> > change. :)
>
> You have a good point regarding tests. Don't see how it affects
> regressions though. Only one file here is ever included per-build.
It's easier to do a per-arch revert (i.e. all the -stable tree
machinery, etc) with a single SHA instead of having to write a partial
revert, etc.
--
Kees Cook
On Thu, Sep 24, 2020 at 10:09 PM Kees Cook <[email protected]> wrote:
> Right, sorry, I may not have been clear. When building my RFC I noticed
> that I couldn't use NR_syscall very "early" in the header file include
> stack on arm64, which complicated things. So I guess what I mean is
> something like "it's probably better to do all these seccomp-specific
> macros/etc in asm/include/seccomp.h rather than in syscall.h because I
> know at least one architecture that might cause trouble."
Ah. Makes sense.
> Ironicailly, that's the only place I actually know for sure where people
> using x32 because it shows measurable (10%) speed-up for builders:
> https://lore.kernel.org/lkml/CAOesGMgu1i3p7XMZuCEtj63T-ST_jh+BfaHy-K6LhgqNriKHAA@mail.gmail.com
Wow. 10% is significant. Makes you wonder why x32 hasn't conquered the world.
> So, yes, as you and Jann both point out, it wouldn't be terrible to just
> ignore x32, it seems a shame to penalize it. That said, if the masking
> step from my v1 is actually noticable on a native workload, then yeah,
> probably x32 should be ignored. My instinct (not measured) is that it's
> faster than walking a small array.[citation needed]
My instinct: should be pretty similar, with the loop unrolled.
You convince me that penalizing supporting x32 would be a pity :( The
10% is so nice I want it.
> It's easier to do a per-arch revert (i.e. all the -stable tree
> machinery, etc) with a single SHA instead of having to write a partial
> revert, etc.
I see. Thanks for clarifying.
How about this? Rather than specifically designing names for bitmasks
(native, compat, multiplex), just have SECCOMP_ARCH_{1,2,3}? Each arch
number would provide the size of the bitmap and a static inline
function to check the given seccomp_data belongs to the arch and if
so, the order of the bit in the bitmap. There is no need for the
shifts and madness in seccomp.c; it's arch-dependent code in their own
seccomp.h. We let the preprocessor and compiler to make things
optimized.
YiFei Zhu
On Thu, Sep 24, 2020 at 10:28 PM YiFei Zhu <[email protected]> wrote:
> Ah. Makes sense.
>
> > Ironicailly, that's the only place I actually know for sure where people
> > using x32 because it shows measurable (10%) speed-up for builders:
> > https://lore.kernel.org/lkml/CAOesGMgu1i3p7XMZuCEtj63T-ST_jh+BfaHy-K6LhgqNriKHAA@mail.gmail.com
>
> Wow. 10% is significant. Makes you wonder why x32 hasn't conquered the world.
>
> > So, yes, as you and Jann both point out, it wouldn't be terrible to just
> > ignore x32, it seems a shame to penalize it. That said, if the masking
> > step from my v1 is actually noticable on a native workload, then yeah,
> > probably x32 should be ignored. My instinct (not measured) is that it's
> > faster than walking a small array.[citation needed]
>
> You convince me that penalizing supporting x32 would be a pity :( The
> 10% is so nice I want it.
I'm rethinking this -- the majority of our users will not use x32. I
don't think it's that useful for the majority to run all the
simulations and have the memory footprint if only a small minority
will use it.
I also just checked Debian, and it has boot-time disabling of the x32
arch downstream [1]:
CONFIG_X86_X32=y
CONFIG_X86_X32_DISABLED=y
Which means we will still generate all the code for x32 in seccomp
even though people probably won't be using it...
I also talked to some of my peers and they had a point regarding how
x32 limiting address space to 4GiB is very harsh on many modern
language runtimes, so even though it provides a 10% speed boost, its
adoption is hard -- one has to compile all the C libraries in x32 in
addition to x86_64, since one would have programs needing > 4GiB
address space needing x86_64 version of the libraries.
[1] https://wiki.debian.org/X32Port
YiFei Zhu