2018-12-06 15:05:57

by David Abdurachmanov

[permalink] [raw]
Subject: [PATCH 0/2] riscv: add support for SECCOMP and SECCOMP_FILTER

This was originally tested on 4.19 kernel with Fedora 29/RISCV.
Depends on audit patches (already in linux-next).

The patches are on top of linux-next next-20181206 tag.

Validation was done using libseccomp (at
1e64feb5f1a9ea02687228e3073e8b784a04ce46, which is master at this
date). See PR: https://github.com/seccomp/libseccomp/pull/134

Test results:

# ./regression -T live

Regression Test Summary
tests run: 8
tests skipped: 0
tests passed: 8
tests failed: 0
tests errored: 0

# ./regression

Regression Test Summary
tests run: 5129
tests skipped: 104
tests passed: 5129
tests failed: 0
tests errored: 0

David Abdurachmanov (2):
riscv: add support for SECCOMP incl. filters
riscv: fix syscall_{get,set}_arguments

arch/riscv/Kconfig | 14 ++++++++++
arch/riscv/include/asm/syscall.h | 42 +++++++++++++++++++++-------
arch/riscv/include/asm/thread_info.h | 5 +++-
arch/riscv/kernel/entry.S | 27 ++++++++++++++++--
arch/riscv/kernel/ptrace.c | 8 ++++++
5 files changed, 83 insertions(+), 13 deletions(-)

--
2.19.2



2018-12-06 15:04:22

by David Abdurachmanov

[permalink] [raw]
Subject: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).

Signed-off-by: David Abdurachmanov <[email protected]>
---
arch/riscv/Kconfig | 14 ++++++++++++++
arch/riscv/include/asm/thread_info.h | 5 ++++-
arch/riscv/kernel/entry.S | 27 +++++++++++++++++++++++++--
arch/riscv/kernel/ptrace.c | 8 ++++++++
4 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a4f48f757204..49cd8e251547 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -29,6 +29,7 @@ config RISCV
select GENERIC_SMP_IDLE_THREAD
select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_SECCOMP_FILTER
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_DMA_CONTIGUOUS
select HAVE_FUTEX_CMPXCHG if FUTEX
@@ -228,6 +229,19 @@ menu "Kernel features"

source "kernel/Kconfig.hz"

+config SECCOMP
+ bool "Enable seccomp to safely compute untrusted bytecode"
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
endmenu

menu "Boot options"
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 1c9cc8389928..1fd6e4130cab 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -81,6 +81,7 @@ struct thread_info {
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
+#define TIF_SECCOMP 8 /* syscall secure computing */

#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -88,11 +89,13 @@ struct thread_info {
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)

#define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)

#define _TIF_SYSCALL_WORK \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \
+ _TIF_SECCOMP )

#endif /* _ASM_RISCV_THREAD_INFO_H */
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 355166f57205..e88ccbfa61ee 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -207,8 +207,25 @@ check_syscall_nr:
/* Check to make sure we don't jump to a bogus syscall number. */
li t0, __NR_syscalls
la s0, sys_ni_syscall
- /* Syscall number held in a7 */
- bgeu a7, t0, 1f
+ /*
+ * The tracer can change syscall number to valid/invalid value.
+ * We use syscall_set_nr helper in syscall_trace_enter thus we
+ * cannot trust the current value in a7 and have to reload from
+ * the current task pt_regs.
+ */
+ REG_L a7, PT_A7(sp)
+ /*
+ * Syscall number held in a7.
+ * If syscall number is above allowed value, redirect to ni_syscall.
+ */
+ bge a7, t0, 1f
+ /*
+ * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
+ * If yes, we pretend it was executed.
+ */
+ li t1, -1
+ beq a7, t1, ret_from_syscall_rejected
+ /* Call syscall */
la s0, sys_call_table
slli t0, a7, RISCV_LGPTR
add s0, s0, t0
@@ -219,6 +236,12 @@ check_syscall_nr:
ret_from_syscall:
/* Set user a0 to kernel a0 */
REG_S a0, PT_A0(sp)
+ /*
+ * We didn't execute the actual syscall.
+ * Seccomp already set return value for the current task pt_regs.
+ * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
+ */
+ret_from_syscall_rejected:
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index c1b51539c3e2..598e48b8ca2b 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs)
if (tracehook_report_syscall_entry(regs))
syscall_set_nr(current, regs, -1);

+ /*
+ * Do the secure computing after ptrace; failures should be fast.
+ * If this fails we might have return value in a0 from seccomp
+ * (via SECCOMP_RET_ERRNO/TRACE).
+ */
+ if (secure_computing(NULL) == -1)
+ syscall_set_nr(current, regs, -1);
+
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, syscall_get_nr(current, regs));
--
2.19.2


2018-12-06 15:05:24

by David Abdurachmanov

[permalink] [raw]
Subject: [PATCH 2/2] riscv: fix syscall_{get,set}_arguments

Testing with libseccomp master branch revealed that testcases with
filters on syscall arguments were failing due to wrong values. Seccomp
uses syscall_get_argumentsi() to copy syscall arguments, and there is a
bug in pointer arithmetics in memcpy() call.

Two alternative implementation were tested: the one in this patch and
another one based on while-break loop. Both delivered the same results.

This implementation is also used in arm, arm64 and nds32 arches.

Signed-off-by: David Abdurachmanov <[email protected]>
---
arch/riscv/include/asm/syscall.h | 42 ++++++++++++++++++++++++--------
1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index bba3da6ef157..26ceb434a433 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -70,19 +70,32 @@ static inline void syscall_set_return_value(struct task_struct *task,
regs->a0 = (long) error ?: val;
}

+#define SYSCALL_MAX_ARGS 6
+
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
+ if (n == 0)
+ return;
+
+ if (i + n > SYSCALL_MAX_ARGS) {
+ unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
+ unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
+ pr_warning("%s called with max args %d, handling only %d\n",
+ __func__, i + n, SYSCALL_MAX_ARGS);
+ memset(args_bad, 0, n_bad * sizeof(args[0]));
+ }
+
if (i == 0) {
args[0] = regs->orig_a0;
args++;
i++;
n--;
}
- memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+
+ memcpy(args, &regs->a0 + i, n * sizeof(args[0]));
}

static inline void syscall_set_arguments(struct task_struct *task,
@@ -90,14 +103,23 @@ static inline void syscall_set_arguments(struct task_struct *task,
unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ if (n == 0)
+ return;
+
+ if (i + n > SYSCALL_MAX_ARGS) {
+ pr_warning("%s called with max args %d, handling only %d\n",
+ __func__, i + n, SYSCALL_MAX_ARGS);
+ n = SYSCALL_MAX_ARGS - i;
+ }
+
+ if (i == 0) {
+ regs->orig_a0 = args[0];
+ args++;
+ i++;
+ n--;
+ }
+
+ memcpy(&regs->a0 + i, args, n * sizeof(args[0]));
}

static inline int syscall_get_arch(void)
--
2.19.2


2018-12-06 16:49:30

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
<[email protected]> wrote:
>
> The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).
>
> Signed-off-by: David Abdurachmanov <[email protected]>
> ---
> arch/riscv/Kconfig | 14 ++++++++++++++
> arch/riscv/include/asm/thread_info.h | 5 ++++-
> arch/riscv/kernel/entry.S | 27 +++++++++++++++++++++++++--
> arch/riscv/kernel/ptrace.c | 8 ++++++++
> 4 files changed, 51 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index a4f48f757204..49cd8e251547 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -29,6 +29,7 @@ config RISCV
> select GENERIC_SMP_IDLE_THREAD
> select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
> select HAVE_ARCH_AUDITSYSCALL
> + select HAVE_ARCH_SECCOMP_FILTER
> select HAVE_MEMBLOCK_NODE_MAP
> select HAVE_DMA_CONTIGUOUS
> select HAVE_FUTEX_CMPXCHG if FUTEX
> @@ -228,6 +229,19 @@ menu "Kernel features"
>
> source "kernel/Kconfig.hz"
>
> +config SECCOMP
> + bool "Enable seccomp to safely compute untrusted bytecode"
> + help
> + This kernel feature is useful for number crunching applications
> + that may need to compute untrusted bytecode during their
> + execution. By using pipes or other transports made available to
> + the process as file descriptors supporting the read/write
> + syscalls, it's possible to isolate those applications in
> + their own address space using seccomp. Once seccomp is
> + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
> + and the task is only allowed to execute a few safe syscalls
> + defined by each seccomp mode.
> +
> endmenu
>
> menu "Boot options"
> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> index 1c9cc8389928..1fd6e4130cab 100644
> --- a/arch/riscv/include/asm/thread_info.h
> +++ b/arch/riscv/include/asm/thread_info.h
> @@ -81,6 +81,7 @@ struct thread_info {
> #define TIF_MEMDIE 5 /* is terminating due to OOM killer */
> #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
> #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
> +#define TIF_SECCOMP 8 /* syscall secure computing */
>
> #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
> #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
> @@ -88,11 +89,13 @@ struct thread_info {
> #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
> #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
> #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
> +#define _TIF_SECCOMP (1 << TIF_SECCOMP)
>
> #define _TIF_WORK_MASK \
> (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)
>
> #define _TIF_SYSCALL_WORK \
> - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
> + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \
> + _TIF_SECCOMP )
>
> #endif /* _ASM_RISCV_THREAD_INFO_H */
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 355166f57205..e88ccbfa61ee 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -207,8 +207,25 @@ check_syscall_nr:
> /* Check to make sure we don't jump to a bogus syscall number. */
> li t0, __NR_syscalls
> la s0, sys_ni_syscall
> - /* Syscall number held in a7 */
> - bgeu a7, t0, 1f
> + /*
> + * The tracer can change syscall number to valid/invalid value.
> + * We use syscall_set_nr helper in syscall_trace_enter thus we
> + * cannot trust the current value in a7 and have to reload from
> + * the current task pt_regs.
> + */
> + REG_L a7, PT_A7(sp)
> + /*
> + * Syscall number held in a7.
> + * If syscall number is above allowed value, redirect to ni_syscall.
> + */
> + bge a7, t0, 1f
> + /*
> + * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
> + * If yes, we pretend it was executed.
> + */
> + li t1, -1
> + beq a7, t1, ret_from_syscall_rejected
> + /* Call syscall */
> la s0, sys_call_table
> slli t0, a7, RISCV_LGPTR
> add s0, s0, t0
> @@ -219,6 +236,12 @@ check_syscall_nr:
> ret_from_syscall:
> /* Set user a0 to kernel a0 */
> REG_S a0, PT_A0(sp)
> + /*
> + * We didn't execute the actual syscall.
> + * Seccomp already set return value for the current task pt_regs.
> + * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
> + */
> +ret_from_syscall_rejected:
> /* Trace syscalls, but only if requested by the user. */
> REG_L t0, TASK_TI_FLAGS(tp)
> andi t0, t0, _TIF_SYSCALL_WORK
> diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
> index c1b51539c3e2..598e48b8ca2b 100644
> --- a/arch/riscv/kernel/ptrace.c
> +++ b/arch/riscv/kernel/ptrace.c
> @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs)
> if (tracehook_report_syscall_entry(regs))
> syscall_set_nr(current, regs, -1);
>
> + /*
> + * Do the secure computing after ptrace; failures should be fast.
> + * If this fails we might have return value in a0 from seccomp
> + * (via SECCOMP_RET_ERRNO/TRACE).
> + */
> + if (secure_computing(NULL) == -1)
> + syscall_set_nr(current, regs, -1);

On a -1 return, this should return immediately -- it should not
continue to process trace_sys_enter(), etc.

-Kees

> +
> #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
> if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> trace_sys_enter(regs, syscall_get_nr(current, regs));
> --
> 2.19.2
>


--
Kees Cook

2018-12-06 16:50:52

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 2/2] riscv: fix syscall_{get,set}_arguments

On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
<[email protected]> wrote:
>
> Testing with libseccomp master branch revealed that testcases with
> filters on syscall arguments were failing due to wrong values. Seccomp
> uses syscall_get_argumentsi() to copy syscall arguments, and there is a
> bug in pointer arithmetics in memcpy() call.
>
> Two alternative implementation were tested: the one in this patch and
> another one based on while-break loop. Both delivered the same results.
>
> This implementation is also used in arm, arm64 and nds32 arches.

Minor nit: can you make this the first patch? That way seccomp works
correctly from the point of introduction. :)

-Kees

>
> Signed-off-by: David Abdurachmanov <[email protected]>
> ---
> arch/riscv/include/asm/syscall.h | 42 ++++++++++++++++++++++++--------
> 1 file changed, 32 insertions(+), 10 deletions(-)
>
> diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
> index bba3da6ef157..26ceb434a433 100644
> --- a/arch/riscv/include/asm/syscall.h
> +++ b/arch/riscv/include/asm/syscall.h
> @@ -70,19 +70,32 @@ static inline void syscall_set_return_value(struct task_struct *task,
> regs->a0 = (long) error ?: val;
> }
>
> +#define SYSCALL_MAX_ARGS 6
> +
> static inline void syscall_get_arguments(struct task_struct *task,
> struct pt_regs *regs,
> unsigned int i, unsigned int n,
> unsigned long *args)
> {
> - BUG_ON(i + n > 6);
> + if (n == 0)
> + return;
> +
> + if (i + n > SYSCALL_MAX_ARGS) {
> + unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
> + unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
> + pr_warning("%s called with max args %d, handling only %d\n",
> + __func__, i + n, SYSCALL_MAX_ARGS);
> + memset(args_bad, 0, n_bad * sizeof(args[0]));
> + }
> +
> if (i == 0) {
> args[0] = regs->orig_a0;
> args++;
> i++;
> n--;
> }
> - memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
> +
> + memcpy(args, &regs->a0 + i, n * sizeof(args[0]));
> }
>
> static inline void syscall_set_arguments(struct task_struct *task,
> @@ -90,14 +103,23 @@ static inline void syscall_set_arguments(struct task_struct *task,
> unsigned int i, unsigned int n,
> const unsigned long *args)
> {
> - BUG_ON(i + n > 6);
> - if (i == 0) {
> - regs->orig_a0 = args[0];
> - args++;
> - i++;
> - n--;
> - }
> - memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
> + if (n == 0)
> + return;
> +
> + if (i + n > SYSCALL_MAX_ARGS) {
> + pr_warning("%s called with max args %d, handling only %d\n",
> + __func__, i + n, SYSCALL_MAX_ARGS);
> + n = SYSCALL_MAX_ARGS - i;
> + }
> +
> + if (i == 0) {
> + regs->orig_a0 = args[0];
> + args++;
> + i++;
> + n--;
> + }
> +
> + memcpy(&regs->a0 + i, args, n * sizeof(args[0]));
> }
>
> static inline int syscall_get_arch(void)
> --
> 2.19.2
>


--
Kees Cook

2018-12-06 16:53:40

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
<[email protected]> wrote:
> The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).

Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c
as well? That selftest finds a lot of weird corner-cases...

> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> index 1c9cc8389928..1fd6e4130cab 100644
> --- a/arch/riscv/include/asm/thread_info.h
> +++ b/arch/riscv/include/asm/thread_info.h
> @@ -81,6 +81,7 @@ struct thread_info {
> #define TIF_MEMDIE 5 /* is terminating due to OOM killer */
> #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
> #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
> +#define TIF_SECCOMP 8 /* syscall secure computing */

Nit: extra tab needs to be removed.

--
Kees Cook

2018-12-06 17:09:07

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
<[email protected]> wrote:
> The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).

I built this against linux-next but it's missing seccomp.h. Was that
accidentally left out of the commit?


CC arch/riscv/kernel/asm-offsets.s
In file included from ./include/linux/sched.h:21:0,
from arch/riscv/kernel/asm-offsets.c:18:
./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such
file or directory
#include <asm/seccomp.h>
^~~~~~~~~~~~~~~

--
Kees Cook

2018-12-06 17:12:30

by David Abdurachmanov

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 5:47 PM Kees Cook <[email protected]> wrote:
>
> On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
> <[email protected]> wrote:
> >
> > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).
> >
> > Signed-off-by: David Abdurachmanov <[email protected]>
> > ---
> > arch/riscv/Kconfig | 14 ++++++++++++++
> > arch/riscv/include/asm/thread_info.h | 5 ++++-
> > arch/riscv/kernel/entry.S | 27 +++++++++++++++++++++++++--
> > arch/riscv/kernel/ptrace.c | 8 ++++++++
> > 4 files changed, 51 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index a4f48f757204..49cd8e251547 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -29,6 +29,7 @@ config RISCV
> > select GENERIC_SMP_IDLE_THREAD
> > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
> > select HAVE_ARCH_AUDITSYSCALL
> > + select HAVE_ARCH_SECCOMP_FILTER
> > select HAVE_MEMBLOCK_NODE_MAP
> > select HAVE_DMA_CONTIGUOUS
> > select HAVE_FUTEX_CMPXCHG if FUTEX
> > @@ -228,6 +229,19 @@ menu "Kernel features"
> >
> > source "kernel/Kconfig.hz"
> >
> > +config SECCOMP
> > + bool "Enable seccomp to safely compute untrusted bytecode"
> > + help
> > + This kernel feature is useful for number crunching applications
> > + that may need to compute untrusted bytecode during their
> > + execution. By using pipes or other transports made available to
> > + the process as file descriptors supporting the read/write
> > + syscalls, it's possible to isolate those applications in
> > + their own address space using seccomp. Once seccomp is
> > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
> > + and the task is only allowed to execute a few safe syscalls
> > + defined by each seccomp mode.
> > +
> > endmenu
> >
> > menu "Boot options"
> > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> > index 1c9cc8389928..1fd6e4130cab 100644
> > --- a/arch/riscv/include/asm/thread_info.h
> > +++ b/arch/riscv/include/asm/thread_info.h
> > @@ -81,6 +81,7 @@ struct thread_info {
> > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */
> > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
> > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
> > +#define TIF_SECCOMP 8 /* syscall secure computing */
> >
> > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
> > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
> > @@ -88,11 +89,13 @@ struct thread_info {
> > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
> > #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
> > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
> > +#define _TIF_SECCOMP (1 << TIF_SECCOMP)
> >
> > #define _TIF_WORK_MASK \
> > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)
> >
> > #define _TIF_SYSCALL_WORK \
> > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
> > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \
> > + _TIF_SECCOMP )
> >
> > #endif /* _ASM_RISCV_THREAD_INFO_H */
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index 355166f57205..e88ccbfa61ee 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -207,8 +207,25 @@ check_syscall_nr:
> > /* Check to make sure we don't jump to a bogus syscall number. */
> > li t0, __NR_syscalls
> > la s0, sys_ni_syscall
> > - /* Syscall number held in a7 */
> > - bgeu a7, t0, 1f
> > + /*
> > + * The tracer can change syscall number to valid/invalid value.
> > + * We use syscall_set_nr helper in syscall_trace_enter thus we
> > + * cannot trust the current value in a7 and have to reload from
> > + * the current task pt_regs.
> > + */
> > + REG_L a7, PT_A7(sp)
> > + /*
> > + * Syscall number held in a7.
> > + * If syscall number is above allowed value, redirect to ni_syscall.
> > + */
> > + bge a7, t0, 1f
> > + /*
> > + * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
> > + * If yes, we pretend it was executed.
> > + */
> > + li t1, -1
> > + beq a7, t1, ret_from_syscall_rejected
> > + /* Call syscall */
> > la s0, sys_call_table
> > slli t0, a7, RISCV_LGPTR
> > add s0, s0, t0
> > @@ -219,6 +236,12 @@ check_syscall_nr:
> > ret_from_syscall:
> > /* Set user a0 to kernel a0 */
> > REG_S a0, PT_A0(sp)
> > + /*
> > + * We didn't execute the actual syscall.
> > + * Seccomp already set return value for the current task pt_regs.
> > + * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
> > + */
> > +ret_from_syscall_rejected:
> > /* Trace syscalls, but only if requested by the user. */
> > REG_L t0, TASK_TI_FLAGS(tp)
> > andi t0, t0, _TIF_SYSCALL_WORK
> > diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
> > index c1b51539c3e2..598e48b8ca2b 100644
> > --- a/arch/riscv/kernel/ptrace.c
> > +++ b/arch/riscv/kernel/ptrace.c
> > @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs)
> > if (tracehook_report_syscall_entry(regs))
> > syscall_set_nr(current, regs, -1);
> >
> > + /*
> > + * Do the secure computing after ptrace; failures should be fast.
> > + * If this fails we might have return value in a0 from seccomp
> > + * (via SECCOMP_RET_ERRNO/TRACE).
> > + */
> > + if (secure_computing(NULL) == -1)
> > + syscall_set_nr(current, regs, -1);
>
> On a -1 return, this should return immediately -- it should not
> continue to process trace_sys_enter(), etc.

Ops! No idea how I missed that.
Will fix it.

> -Kees
>
> > +
> > #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
> > if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> > trace_sys_enter(regs, syscall_get_nr(current, regs));
> > --
> > 2.19.2
> >
>
>
> --
> Kees Cook

2018-12-06 17:14:13

by David Abdurachmanov

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 5:52 PM Kees Cook <[email protected]> wrote:
>
> On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
> <[email protected]> wrote:
> > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).
>
> Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c
> as well? That selftest finds a lot of weird corner-cases...
>
> > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> > index 1c9cc8389928..1fd6e4130cab 100644
> > --- a/arch/riscv/include/asm/thread_info.h
> > +++ b/arch/riscv/include/asm/thread_info.h
> > @@ -81,6 +81,7 @@ struct thread_info {
> > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */
> > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
> > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
> > +#define TIF_SECCOMP 8 /* syscall secure computing */
>
> Nit: extra tab needs to be removed.

Will fix it.
I need to cleanup my vim configuration for tab with.

david

2018-12-06 17:14:43

by David Abdurachmanov

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 6:07 PM Kees Cook <[email protected]> wrote:
>
> On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
> <[email protected]> wrote:
> > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).
>
> I built this against linux-next but it's missing seccomp.h. Was that
> accidentally left out of the commit?
>
>
> CC arch/riscv/kernel/asm-offsets.s
> In file included from ./include/linux/sched.h:21:0,
> from arch/riscv/kernel/asm-offsets.c:18:
> ./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such
> file or directory
> #include <asm/seccomp.h>
> ^~~~~~~~~~~~~~~
>

I forgot to add it...
Will fix it.

david

2018-12-06 17:16:25

by David Abdurachmanov

[permalink] [raw]
Subject: Re: [PATCH 2/2] riscv: fix syscall_{get,set}_arguments

On Thu, Dec 6, 2018 at 5:49 PM Kees Cook <[email protected]> wrote:
>
> On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
> <[email protected]> wrote:
> >
> > Testing with libseccomp master branch revealed that testcases with
> > filters on syscall arguments were failing due to wrong values. Seccomp
> > uses syscall_get_argumentsi() to copy syscall arguments, and there is a
> > bug in pointer arithmetics in memcpy() call.
> >
> > Two alternative implementation were tested: the one in this patch and
> > another one based on while-break loop. Both delivered the same results.
> >
> > This implementation is also used in arm, arm64 and nds32 arches.
>
> Minor nit: can you make this the first patch? That way seccomp works
> correctly from the point of introduction. :)

Ok. I will do it.

david

>
> -Kees
>
> >
> > Signed-off-by: David Abdurachmanov <[email protected]>
> > ---
> > arch/riscv/include/asm/syscall.h | 42 ++++++++++++++++++++++++--------
> > 1 file changed, 32 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
> > index bba3da6ef157..26ceb434a433 100644
> > --- a/arch/riscv/include/asm/syscall.h
> > +++ b/arch/riscv/include/asm/syscall.h
> > @@ -70,19 +70,32 @@ static inline void syscall_set_return_value(struct task_struct *task,
> > regs->a0 = (long) error ?: val;
> > }
> >
> > +#define SYSCALL_MAX_ARGS 6
> > +
> > static inline void syscall_get_arguments(struct task_struct *task,
> > struct pt_regs *regs,
> > unsigned int i, unsigned int n,
> > unsigned long *args)
> > {
> > - BUG_ON(i + n > 6);
> > + if (n == 0)
> > + return;
> > +
> > + if (i + n > SYSCALL_MAX_ARGS) {
> > + unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
> > + unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
> > + pr_warning("%s called with max args %d, handling only %d\n",
> > + __func__, i + n, SYSCALL_MAX_ARGS);
> > + memset(args_bad, 0, n_bad * sizeof(args[0]));
> > + }
> > +
> > if (i == 0) {
> > args[0] = regs->orig_a0;
> > args++;
> > i++;
> > n--;
> > }
> > - memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
> > +
> > + memcpy(args, &regs->a0 + i, n * sizeof(args[0]));
> > }
> >
> > static inline void syscall_set_arguments(struct task_struct *task,
> > @@ -90,14 +103,23 @@ static inline void syscall_set_arguments(struct task_struct *task,
> > unsigned int i, unsigned int n,
> > const unsigned long *args)
> > {
> > - BUG_ON(i + n > 6);
> > - if (i == 0) {
> > - regs->orig_a0 = args[0];
> > - args++;
> > - i++;
> > - n--;
> > - }
> > - memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
> > + if (n == 0)
> > + return;
> > +
> > + if (i + n > SYSCALL_MAX_ARGS) {
> > + pr_warning("%s called with max args %d, handling only %d\n",
> > + __func__, i + n, SYSCALL_MAX_ARGS);
> > + n = SYSCALL_MAX_ARGS - i;
> > + }
> > +
> > + if (i == 0) {
> > + regs->orig_a0 = args[0];
> > + args++;
> > + i++;
> > + n--;
> > + }
> > +
> > + memcpy(&regs->a0 + i, args, n * sizeof(args[0]));
> > }
> >
> > static inline int syscall_get_arch(void)
> > --
> > 2.19.2
> >
>
>
> --
> Kees Cook

2018-12-06 18:27:39

by David Abdurachmanov

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 5:52 PM Kees Cook <[email protected]> wrote:
>
> On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
> <[email protected]> wrote:
> > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).
>
> Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c
> as well? That selftest finds a lot of weird corner-cases...

I hate it locally and will include in v2.

The results see fine (tested in QEMU).

TAP version 13
selftests: seccomp: seccomp_bpf
========================================
[==========] Running 64 tests from 1 test cases.
[ RUN ] global.mode_strict_support
[ OK ] global.mode_strict_support
[ RUN ] global.mode_strict_cannot_call_prctl
[ OK ] global.mode_strict_cannot_call_prctl
[ RUN ] global.no_new_privs_support
[ OK ] global.no_new_privs_support
[ RUN ] global.mode_filter_support
[ OK ] global.mode_filter_support
[ RUN ] global.mode_filter_without_nnp
[ OK ] global.mode_filter_without_nnp
[ RUN ] global.filter_size_limits
[ OK ] global.filter_size_limits
[ RUN ] global.filter_chain_limits
[ OK ] global.filter_chain_limits
[ RUN ] global.mode_filter_cannot_move_to_strict
[ OK ] global.mode_filter_cannot_move_to_strict
[ RUN ] global.mode_filter_get_seccomp
[ OK ] global.mode_filter_get_seccomp
[ RUN ] global.ALLOW_all
[ OK ] global.ALLOW_all
[ RUN ] global.empty_prog
[ OK ] global.empty_prog
[ RUN ] global.log_all
[ OK ] global.log_all
[ RUN ] global.unknown_ret_is_kill_inside
[ OK ] global.unknown_ret_is_kill_inside
[ RUN ] global.unknown_ret_is_kill_above_allow
[ OK ] global.unknown_ret_is_kill_above_allow
[ RUN ] global.KILL_all
[ OK ] global.KILL_all
[ RUN ] global.KILL_one
[ OK ] global.KILL_one
[ RUN ] global.KILL_one_arg_one
[ OK ] global.KILL_one_arg_one
[ RUN ] global.KILL_one_arg_six
[ OK ] global.KILL_one_arg_six
[ RUN ] global.KILL_thread
[ OK ] global.KILL_thread
[ RUN ] global.KILL_process
[ OK ] global.KILL_process
[ RUN ] global.arg_out_of_range
[ OK ] global.arg_out_of_range
[ RUN ] global.ERRNO_valid
[ OK ] global.ERRNO_valid
[ RUN ] global.ERRNO_zero
[ OK ] global.ERRNO_zero
[ RUN ] global.ERRNO_capped
[ OK ] global.ERRNO_capped
[ RUN ] global.ERRNO_order
[ OK ] global.ERRNO_order
[ RUN ] TRAP.dfl
[ OK ] TRAP.dfl
[ RUN ] TRAP.ign
[ OK ] TRAP.ign
[ RUN ] TRAP.handler
[ OK ] TRAP.handler
[ RUN ] precedence.allow_ok
[ OK ] precedence.allow_ok
[ RUN ] precedence.kill_is_highest
[ OK ] precedence.kill_is_highest
[ RUN ] precedence.kill_is_highest_in_any_order
[ OK ] precedence.kill_is_highest_in_any_order
[ RUN ] precedence.trap_is_second
[ OK ] precedence.trap_is_second
[ RUN ] precedence.trap_is_second_in_any_order
[ OK ] precedence.trap_is_second_in_any_order
[ RUN ] precedence.errno_is_third
[ OK ] precedence.errno_is_third
[ RUN ] precedence.errno_is_third_in_any_order
[ OK ] precedence.errno_is_third_in_any_order
[ RUN ] precedence.trace_is_fourth
[ OK ] precedence.trace_is_fourth
[ RUN ] precedence.trace_is_fourth_in_any_order
[ OK ] precedence.trace_is_fourth_in_any_order
[ RUN ] precedence.log_is_fifth
[ OK ] precedence.log_is_fifth
[ RUN ] precedence.log_is_fifth_in_any_order
[ OK ] precedence.log_is_fifth_in_any_order
[ RUN ] TRACE_poke.read_has_side_effects
[ OK ] TRACE_poke.read_has_side_effects
[ RUN ] TRACE_poke.getpid_runs_normally
[ OK ] TRACE_poke.getpid_runs_normally
[ RUN ] TRACE_syscall.ptrace_syscall_redirected
[ OK ] TRACE_syscall.ptrace_syscall_redirected
[ RUN ] TRACE_syscall.ptrace_syscall_dropped
[ OK ] TRACE_syscall.ptrace_syscall_dropped
[ RUN ] TRACE_syscall.syscall_allowed
[ OK ] TRACE_syscall.syscall_allowed
[ RUN ] TRACE_syscall.syscall_redirected
[ OK ] TRACE_syscall.syscall_redirected
[ RUN ] TRACE_syscall.syscall_dropped
[ OK ] TRACE_syscall.syscall_dropped
[ RUN ] TRACE_syscall.skip_after_RET_TRACE
[ OK ] TRACE_syscall.skip_after_RET_TRACE
[ RUN ] TRACE_syscall.kill_after_RET_TRACE
[ OK ] TRACE_syscall.kill_after_RET_TRACE
[ RUN ] TRACE_syscall.skip_after_ptrace
[ OK ] TRACE_syscall.skip_after_ptrace
[ RUN ] TRACE_syscall.kill_after_ptrace
[ OK ] TRACE_syscall.kill_after_ptrace
[ RUN ] global.seccomp_syscall
[ OK ] global.seccomp_syscall
[ RUN ] global.seccomp_syscall_mode_lock
[ OK ] global.seccomp_syscall_mode_lock
[ RUN ] global.detect_seccomp_filter_flags
[ OK ] global.detect_seccomp_filter_flags
[ RUN ] global.TSYNC_first
[ OK ] global.TSYNC_first
[ RUN ] TSYNC.siblings_fail_prctl
[ OK ] TSYNC.siblings_fail_prctl
[ RUN ] TSYNC.two_siblings_with_ancestor
[ OK ] TSYNC.two_siblings_with_ancestor
[ RUN ] TSYNC.two_sibling_want_nnp
[ OK ] TSYNC.two_sibling_want_nnp
[ RUN ] TSYNC.two_siblings_with_no_filter
[ OK ] TSYNC.two_siblings_with_no_filter
[ RUN ] TSYNC.two_siblings_with_one_divergence
[ OK ] TSYNC.two_siblings_with_one_divergence
[ RUN ] TSYNC.two_siblings_not_under_filter
[ OK ] TSYNC.two_siblings_not_under_filter
[ RUN ] global.syscall_restart
[ OK ] global.syscall_restart
[ RUN ] global.filter_flag_log
[ OK ] global.filter_flag_log
[ RUN ] global.get_action_avail
[ OK ] global.get_action_avail
[ RUN ] global.get_metadata
[ OK ] global.get_metadata
[==========] 64 / 64 tests passed.
[ PASSED ]
ok 1..1 selftests: seccomp: seccomp_bpf [PASS]
selftests: seccomp: seccomp_benchmark
========================================
Calibrating reasonable sample size...
1544120467.383132905 - 1544120467.382814604 = 318301
1544120467.384111505 - 1544120467.383931405 = 180100
1544120467.385728706 - 1544120467.384510905 = 1217801
1544120467.386858006 - 1544120467.386096506 = 761500
1544120467.388563407 - 1544120467.387171006 = 1392401
1544120467.392465908 - 1544120467.390143107 = 2322801
1544120467.397988410 - 1544120467.393666109 = 4322301
1544120467.406494614 - 1544120467.398347511 = 8147103
1544120467.427372522 - 1544120467.406955414 = 20417108
1544120467.467600338 - 1544120467.427772222 = 39828116
1544120467.542484667 - 1544120467.467954738 = 74529929
1544120467.693806026 - 1544120467.543004867 = 150801159
1544120467.970921334 - 1544120467.694244026 = 276677308
1544120468.522149049 - 1544120467.971549534 = 550599515
1544120469.637696984 - 1544120468.522606749 = 1115090235
1544120471.829467338 - 1544120469.638147084 = 2191320254
1544120476.263601568 - 1544120471.829850239 = 4433751329
1544120485.135465027 - 1544120476.263980268 = 8871484759
Benchmarking 4194304 samples...
26.716000000 - 17.812000000 = 8904000000
getpid native: 2122 ns
46.548000000 - 26.716000000 = 19832000000
getpid RET_ALLOW: 4728 ns
Estimated seccomp overhead per syscall: 2606 ns
ok 1..2 selftests: seccomp: seccomp_benchmark [PASS]

>
> > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> > index 1c9cc8389928..1fd6e4130cab 100644
> > --- a/arch/riscv/include/asm/thread_info.h
> > +++ b/arch/riscv/include/asm/thread_info.h
> > @@ -81,6 +81,7 @@ struct thread_info {
> > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */
> > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
> > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
> > +#define TIF_SECCOMP 8 /* syscall secure computing */
>
> Nit: extra tab needs to be removed.
>
> --
> Kees Cook

2018-12-06 18:34:32

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters

On Thu, Dec 6, 2018 at 10:26 AM David Abdurachmanov
<[email protected]> wrote:
>
> On Thu, Dec 6, 2018 at 5:52 PM Kees Cook <[email protected]> wrote:
> >
> > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
> > <[email protected]> wrote:
> > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).
> >
> > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c
> > as well? That selftest finds a lot of weird corner-cases...
>
> I hate it locally and will include in v2.

I hate it too. ;) But seriously (reading through the "have" typo),
that's awesome. Thanks!

> The results see fine (tested in QEMU).

Great!

--
Kees Cook