2013-04-26 07:52:49

by Xi Wang

[permalink] [raw]
Subject: [RFC PATCH net-next 0/6] seccomp filter JIT

This patchset brings JIT support to seccomp filters for x86_64 and ARM.
It is against the net-next tree.

The current BPF JIT interface only accepts sk_filter, not seccomp_filter.
Patch 1/6 refactors the interface to make it more general.

With the refactored interface, patches 2/6 and 3/6 implement the seccomp
BPF_S_ANC_SECCOMP_LD_W instruction in x86 & ARM JIT.

Status:

* x86_64 & ARM: JIT tested with seccomp examples.

* powerpc [4/6]: no seccomp change - compile checked.

* sparc [5/6] & s390 [6/6]: no seccomp change - untested.

Sorry I have no sparc or s390 build environment here. Can someone help
check 5/6 and 6/6? Thanks.

Xi Wang (6):
filter: refactor BPF JIT for seccomp filters
x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction
ARM: net: bpf_jit_32: support BPF_S_ANC_SECCOMP_LD_W instruction
PPC: net: bpf_jit_comp: refactor the BPF JIT interface
sparc: bpf_jit_comp: refactor the BPF JIT interface
s390/bpf,jit: refactor the BPF JIT interface

arch/arm/net/bpf_jit_32.c | 64 +++++++++++++++++++++++++----------------
arch/powerpc/net/bpf_jit_comp.c | 36 +++++++++++------------
arch/s390/net/bpf_jit_comp.c | 31 ++++++++++----------
arch/sparc/net/bpf_jit_comp.c | 22 +++++++-------
arch/x86/net/bpf_jit_comp.c | 38 ++++++++++++++++--------
include/linux/filter.h | 16 +++++++----
kernel/seccomp.c | 6 +++-
net/core/filter.c | 6 ++--
8 files changed, 127 insertions(+), 92 deletions(-)

--
1.8.1.2


2013-04-26 07:52:55

by Xi Wang

[permalink] [raw]
Subject: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

This patch implements the seccomp BPF_S_ANC_SECCOMP_LD_W instruction
in x86 JIT.

Signed-off-by: Xi Wang <[email protected]>
---
arch/x86/net/bpf_jit_comp.c | 38 ++++++++++++++++++++++++++------------
1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f66b540..03c9c81 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -8,10 +8,11 @@
* of the License.
*/
#include <linux/moduleloader.h>
-#include <asm/cacheflush.h>
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <asm/cacheflush.h>
+#include <asm/syscall.h>

/*
* Conventions :
@@ -144,7 +145,7 @@ static int pkt_type_offset(void)
return -1;
}

-void bpf_jit_compile(struct sk_filter *fp)
+bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen)
{
u8 temp[64];
u8 *prog;
@@ -157,15 +158,14 @@ void bpf_jit_compile(struct sk_filter *fp)
int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
unsigned int cleanup_addr; /* epilogue code offset */
unsigned int *addrs;
- const struct sock_filter *filter = fp->insns;
- int flen = fp->len;
+ bpf_func_t bpf_func = sk_run_filter;

if (!bpf_jit_enable)
- return;
+ return bpf_func;

addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL)
- return;
+ return bpf_func;

/* Before first pass, make a rough estimation of addrs[]
* each bpf instruction is translated to less than 64 bytes
@@ -684,6 +684,20 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
}
EMIT_COND_JMP(f_op, f_offset);
break;
+#ifdef CONFIG_SECCOMP_FILTER
+ case BPF_S_ANC_SECCOMP_LD_W:
+ if (K == offsetof(struct seccomp_data, arch)) {
+ int arch = syscall_get_arch(current, NULL);
+
+ EMIT1_off32(0xb8, arch); /* mov arch,%eax */
+ break;
+ }
+ func = (u8 *)seccomp_bpf_load;
+ t_offset = func - (image + addrs[i]);
+ EMIT1_off32(0xbf, K); /* mov imm32,%edi */
+ EMIT1_off32(0xe8, t_offset); /* call seccomp_bpf_load */
+ break;
+#endif
default:
/* hmm, too complex filter, give up with jit compiler */
goto out;
@@ -694,7 +708,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
pr_err("bpb_jit_compile fatal error\n");
kfree(addrs);
module_free(NULL, image);
- return;
+ return bpf_func;
}
memcpy(image + proglen, temp, ilen);
}
@@ -731,11 +745,11 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];

if (image) {
bpf_flush_icache(image, image + proglen);
- fp->bpf_func = (void *)image;
+ bpf_func = (void *)image;
}
out:
kfree(addrs);
- return;
+ return bpf_func;
}

static void jit_free_defer(struct work_struct *arg)
@@ -746,10 +760,10 @@ static void jit_free_defer(struct work_struct *arg)
/* run from softirq, we must use a work_struct to call
* module_free() from process context
*/
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(bpf_func_t bpf_func)
{
- if (fp->bpf_func != sk_run_filter) {
- struct work_struct *work = (struct work_struct *)fp->bpf_func;
+ if (bpf_func != sk_run_filter) {
+ struct work_struct *work = (struct work_struct *)bpf_func;

INIT_WORK(work, jit_free_defer);
schedule_work(work);
--
1.8.1.2

2013-04-26 07:53:14

by Xi Wang

[permalink] [raw]
Subject: [RFC PATCH net-next 6/6] s390/bpf,jit: refactor the BPF JIT interface

Implement the refactored bpf_jit_compile() and bpf_jit_free().

Signed-off-by: Xi Wang <[email protected]>
---
arch/s390/net/bpf_jit_comp.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 0972e91..7966e0c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -738,19 +738,19 @@ out:
return -1;
}

-void bpf_jit_compile(struct sk_filter *fp)
+bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen)
{
unsigned long size, prg_len, lit_len;
struct bpf_jit jit, cjit;
unsigned int *addrs;
int pass, i;
+ bpf_func_t bpf_func = sk_run_filter;

if (!bpf_jit_enable)
- return;
- addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL);
+ return bpf_func;
+ addrs = kzalloc(flen * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL)
- return;
- memset(addrs, 0, fp->len * sizeof(*addrs));
+ return bpf_func;
memset(&jit, 0, sizeof(cjit));
memset(&cjit, 0, sizeof(cjit));

@@ -759,10 +759,10 @@ void bpf_jit_compile(struct sk_filter *fp)
jit.lit = jit.mid;

bpf_jit_prologue(&jit);
- bpf_jit_noleaks(&jit, fp->insns);
- for (i = 0; i < fp->len; i++) {
- if (bpf_jit_insn(&jit, fp->insns + i, addrs, i,
- i == fp->len - 1))
+ bpf_jit_noleaks(&jit, filter);
+ for (i = 0; i < flen; i++) {
+ if (bpf_jit_insn(&jit, filter + i, addrs, i,
+ i == flen - 1))
goto out;
}
bpf_jit_epilogue(&jit);
@@ -789,8 +789,8 @@ void bpf_jit_compile(struct sk_filter *fp)
cjit = jit;
}
if (bpf_jit_enable > 1) {
- pr_err("flen=%d proglen=%lu pass=%d image=%p\n",
- fp->len, jit.end - jit.start, pass, jit.start);
+ pr_err("flen=%u proglen=%lu pass=%d image=%p\n",
+ flen, jit.end - jit.start, pass, jit.start);
if (jit.start) {
printk(KERN_ERR "JIT code:\n");
print_fn_code(jit.start, jit.mid - jit.start);
@@ -800,9 +800,10 @@ void bpf_jit_compile(struct sk_filter *fp)
}
}
if (jit.start)
- fp->bpf_func = (void *) jit.start;
+ bpf_func = (void *) jit.start;
out:
kfree(addrs);
+ return bpf_func;
}

static void jit_free_defer(struct work_struct *arg)
@@ -813,13 +814,13 @@ static void jit_free_defer(struct work_struct *arg)
/* run from softirq, we must use a work_struct to call
* module_free() from process context
*/
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(bpf_func_t bpf_func)
{
struct work_struct *work;

- if (fp->bpf_func == sk_run_filter)
+ if (bpf_func == sk_run_filter)
return;
- work = (struct work_struct *)fp->bpf_func;
+ work = (struct work_struct *)bpf_func;
INIT_WORK(work, jit_free_defer);
schedule_work(work);
}
--
1.8.1.2

2013-04-26 07:52:53

by Xi Wang

[permalink] [raw]
Subject: [RFC PATCH net-next 3/6] ARM: net: bpf_jit_32: support BPF_S_ANC_SECCOMP_LD_W instruction

This patch implements the seccomp BPF_S_ANC_SECCOMP_LD_W instruction
in ARM JIT.

Signed-off-by: Xi Wang <[email protected]>
---
arch/arm/net/bpf_jit_32.c | 64 +++++++++++++++++++++++++++++------------------
1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 1a643ee..9bfce464 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -19,6 +19,7 @@
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
#include <asm/hwcap.h>
+#include <asm/syscall.h>

#include "bpf_jit_32.h"

@@ -55,7 +56,8 @@
#define FLAG_NEED_X_RESET (1 << 0)

struct jit_ctx {
- const struct sk_filter *skf;
+ struct sock_filter *insns;
+ unsigned len;
unsigned idx;
unsigned prologue_bytes;
int ret0_fp_idx;
@@ -131,8 +133,8 @@ static u16 saved_regs(struct jit_ctx *ctx)
{
u16 ret = 0;

- if ((ctx->skf->len > 1) ||
- (ctx->skf->insns[0].code == BPF_S_RET_A))
+ if ((ctx->len > 1) ||
+ (ctx->insns[0].code == BPF_S_RET_A))
ret |= 1 << r_A;

#ifdef CONFIG_FRAME_POINTER
@@ -181,7 +183,7 @@ static inline bool is_load_to_a(u16 inst)
static void build_prologue(struct jit_ctx *ctx)
{
u16 reg_set = saved_regs(ctx);
- u16 first_inst = ctx->skf->insns[0].code;
+ u16 first_inst = ctx->insns[0].code;
u16 off;

#ifdef CONFIG_FRAME_POINTER
@@ -279,7 +281,7 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
ctx->imms[i] = k;

/* constants go just after the epilogue */
- offset = ctx->offsets[ctx->skf->len];
+ offset = ctx->offsets[ctx->len];
offset += ctx->prologue_bytes;
offset += ctx->epilogue_bytes;
offset += i * 4;
@@ -419,7 +421,7 @@ static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
} else {
_emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
- _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
+ _emit(cond, ARM_B(b_imm(ctx->len, ctx)), ctx);
}
}

@@ -469,14 +471,13 @@ static inline void update_on_xread(struct jit_ctx *ctx)
static int build_body(struct jit_ctx *ctx)
{
void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
- const struct sk_filter *prog = ctx->skf;
const struct sock_filter *inst;
unsigned i, load_order, off, condt;
int imm12;
u32 k;

- for (i = 0; i < prog->len; i++) {
- inst = &(prog->insns[i]);
+ for (i = 0; i < ctx->len; i++) {
+ inst = &(ctx->insns[i]);
/* K as an immediate value operand */
k = inst->k;

@@ -769,8 +770,8 @@ cmp_x:
ctx->ret0_fp_idx = i;
emit_mov_i(ARM_R0, k, ctx);
b_epilogue:
- if (i != ctx->skf->len - 1)
- emit(ARM_B(b_imm(prog->len, ctx)), ctx);
+ if (i != ctx->len - 1)
+ emit(ARM_B(b_imm(ctx->len, ctx)), ctx);
break;
case BPF_S_MISC_TAX:
/* X = A */
@@ -845,6 +846,19 @@ b_epilogue:
off = offsetof(struct sk_buff, queue_mapping);
emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
break;
+#ifdef CONFIG_SECCOMP_FILTER
+ case BPF_S_ANC_SECCOMP_LD_W:
+ if (k == offsetof(struct seccomp_data, arch)) {
+ emit_mov_i(r_A, AUDIT_ARCH_ARM, ctx);
+ break;
+ }
+ ctx->seen |= SEEN_CALL;
+ emit_mov_i(ARM_R3, (u32)seccomp_bpf_load, ctx);
+ emit_mov_i(ARM_R0, k, ctx);
+ emit_blx_r(ARM_R3, ctx);
+ emit(ARM_MOV_R(r_A, ARM_R0), ctx);
+ break;
+#endif
default:
return -1;
}
@@ -858,22 +872,24 @@ b_epilogue:
}


-void bpf_jit_compile(struct sk_filter *fp)
+bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen)
{
struct jit_ctx ctx;
unsigned tmp_idx;
unsigned alloc_size;
+ bpf_func_t bpf_func = sk_run_filter;

if (!bpf_jit_enable)
- return;
+ return bpf_func;

memset(&ctx, 0, sizeof(ctx));
- ctx.skf = fp;
- ctx.ret0_fp_idx = -1;
+ ctx.insns = filter;
+ ctx.len = flen;
+ ctx.ret0_fp_idx = -1;

- ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
+ ctx.offsets = kzalloc(4 * (ctx.len + 1), GFP_KERNEL);
if (ctx.offsets == NULL)
- return;
+ return bpf_func;

/* fake pass to fill in the ctx->seen */
if (unlikely(build_body(&ctx)))
@@ -919,12 +935,12 @@ void bpf_jit_compile(struct sk_filter *fp)

if (bpf_jit_enable > 1)
/* there are 2 passes here */
- bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
+ bpf_jit_dump(ctx.len, alloc_size, 2, ctx.target);

- fp->bpf_func = (void *)ctx.target;
+ bpf_func = (void *)ctx.target;
out:
kfree(ctx.offsets);
- return;
+ return bpf_func;
}

static void bpf_jit_free_worker(struct work_struct *work)
@@ -932,12 +948,10 @@ static void bpf_jit_free_worker(struct work_struct *work)
module_free(NULL, work);
}

-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(bpf_func_t bpf_func)
{
- struct work_struct *work;
-
- if (fp->bpf_func != sk_run_filter) {
- work = (struct work_struct *)fp->bpf_func;
+ if (bpf_func != sk_run_filter) {
+ struct work_struct *work = (struct work_struct *)bpf_func;

INIT_WORK(work, bpf_jit_free_worker);
schedule_work(work);
--
1.8.1.2

2013-04-26 07:53:48

by Xi Wang

[permalink] [raw]
Subject: [RFC PATCH net-next 5/6] sparc: bpf_jit_comp: refactor the BPF JIT interface

Implement the refactored bpf_jit_compile() and bpf_jit_free().

Signed-off-by: Xi Wang <[email protected]>
---
arch/sparc/net/bpf_jit_comp.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index d36a85e..15e6513 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -354,21 +354,21 @@ do { *prog++ = BR_OPC | WDISP22(OFF); \
* emit_jump() calls with adjusted offsets.
*/

-void bpf_jit_compile(struct sk_filter *fp)
+bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen)
{
unsigned int cleanup_addr, proglen, oldproglen = 0;
u32 temp[8], *prog, *func, seen = 0, pass;
- const struct sock_filter *filter = fp->insns;
- int i, flen = fp->len, pc_ret0 = -1;
+ int i, pc_ret0 = -1;
unsigned int *addrs;
void *image;
+ bpf_func_t bpf_func = sk_run_filter;

if (!bpf_jit_enable)
- return;
+ return bpf_func;

addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL)
- return;
+ return bpf_func;

/* Before first pass, make a rough estimation of addrs[]
* each bpf instruction is translated to less than 64 bytes
@@ -763,7 +763,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
pr_err("bpb_jit_compile fatal error\n");
kfree(addrs);
module_free(NULL, image);
- return;
+ return bpf_func;
}
memcpy(image + proglen, temp, ilen);
}
@@ -799,11 +799,11 @@ cond_branch: f_offset = addrs[i + filter[i].jf];

if (image) {
bpf_flush_icache(image, image + proglen);
- fp->bpf_func = (void *)image;
+ bpf_func = (void *)image;
}
out:
kfree(addrs);
- return;
+ return bpf_func;
}

static void jit_free_defer(struct work_struct *arg)
@@ -814,10 +814,10 @@ static void jit_free_defer(struct work_struct *arg)
/* run from softirq, we must use a work_struct to call
* module_free() from process context
*/
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(bpf_func_t bpf_func)
{
- if (fp->bpf_func != sk_run_filter) {
- struct work_struct *work = (struct work_struct *)fp->bpf_func;
+ if (bpf_func != sk_run_filter) {
+ struct work_struct *work = (struct work_struct *)bpf_func;

INIT_WORK(work, jit_free_defer);
schedule_work(work);
--
1.8.1.2

2013-04-26 07:54:11

by Xi Wang

[permalink] [raw]
Subject: [RFC PATCH net-next 4/6] PPC: net: bpf_jit_comp: refactor the BPF JIT interface

Implement the refactored bpf_jit_compile() and bpf_jit_free().

Signed-off-by: Xi Wang <[email protected]>
---
arch/powerpc/net/bpf_jit_comp.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index c427ae3..a82e400 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -31,11 +31,11 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}

-static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
+static void bpf_jit_build_prologue(struct sock_filter *filter,
+ u32 *image,
struct codegen_context *ctx)
{
int i;
- const struct sock_filter *filter = fp->insns;

if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
/* Make stackframe */
@@ -135,12 +135,12 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)

/* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
+static int bpf_jit_build_body(struct sock_filter *filter,
+ unsigned int flen,
+ u32 *image,
struct codegen_context *ctx,
unsigned int *addrs)
{
- const struct sock_filter *filter = fp->insns;
- int flen = fp->len;
u8 *func;
unsigned int true_cond;
int i;
@@ -564,7 +564,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
return 0;
}

-void bpf_jit_compile(struct sk_filter *fp)
+bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen)
{
unsigned int proglen;
unsigned int alloclen;
@@ -573,14 +573,14 @@ void bpf_jit_compile(struct sk_filter *fp)
unsigned int *addrs;
struct codegen_context cgctx;
int pass;
- int flen = fp->len;
+ bpf_func_t bpf_func = sk_run_filter;

if (!bpf_jit_enable)
- return;
+ return bpf_func;

addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL)
- return;
+ return bpf_func;

/*
* There are multiple assembly passes as the generated code will change
@@ -636,7 +636,7 @@ void bpf_jit_compile(struct sk_filter *fp)
cgctx.seen = 0;
cgctx.pc_ret0 = -1;
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+ if (bpf_jit_build_body(filter, flen, 0, &cgctx, addrs))
/* We hit something illegal or unsupported. */
goto out;

@@ -645,7 +645,7 @@ void bpf_jit_compile(struct sk_filter *fp)
* update ctgtx.idx as it pretends to output instructions, then we can
* calculate total size from idx.
*/
- bpf_jit_build_prologue(fp, 0, &cgctx);
+ bpf_jit_build_prologue(filter, 0, &cgctx);
bpf_jit_build_epilogue(0, &cgctx);

proglen = cgctx.idx * 4;
@@ -661,8 +661,8 @@ void bpf_jit_compile(struct sk_filter *fp)
for (pass = 1; pass < 3; pass++) {
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
- bpf_jit_build_prologue(fp, code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+ bpf_jit_build_prologue(filter, code_base, &cgctx);
+ bpf_jit_build_body(filter, flen, code_base, &cgctx, addrs);
bpf_jit_build_epilogue(code_base, &cgctx);

if (bpf_jit_enable > 1)
@@ -681,11 +681,11 @@ void bpf_jit_compile(struct sk_filter *fp)
/* Function descriptor nastiness: Address + TOC */
((u64 *)image)[0] = (u64)code_base;
((u64 *)image)[1] = local_paca->kernel_toc;
- fp->bpf_func = (void *)image;
+ bpf_func = (void *)image;
}
out:
kfree(addrs);
- return;
+ return bpf_func;
}

static void jit_free_defer(struct work_struct *arg)
@@ -696,10 +696,10 @@ static void jit_free_defer(struct work_struct *arg)
/* run from softirq, we must use a work_struct to call
* module_free() from process context
*/
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(bpf_func_t bpf_func)
{
- if (fp->bpf_func != sk_run_filter) {
- struct work_struct *work = (struct work_struct *)fp->bpf_func;
+ if (bpf_func != sk_run_filter) {
+ struct work_struct *work = (struct work_struct *)bpf_func;

INIT_WORK(work, jit_free_defer);
schedule_work(work);
--
1.8.1.2

2013-04-26 07:54:36

by Xi Wang

[permalink] [raw]
Subject: [RFC PATCH net-next 1/6] filter: refactor BPF JIT for seccomp filters

Currently, bpf_jit_compile() and bpf_jit_free() takes an sk_filter,
which seccomp filters cannot reuse.

Change bpf_jit_compile() to take a pointer to BPF instructions and
the length, and to return a JITted function.

Change bpf_jit_free() to take a JITted function.

Add JIT calls for seccomp filters.

Signed-off-by: Xi Wang <[email protected]>
---
include/linux/filter.h | 16 ++++++++++------
kernel/seccomp.c | 6 +++++-
net/core/filter.c | 6 ++----
3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index d1248f4..8743093 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,12 +21,14 @@ struct compat_sock_fprog {
struct sk_buff;
struct sock;

+typedef unsigned int (*bpf_func_t)(const struct sk_buff *skb,
+ const struct sock_filter *filter);
+
struct sk_filter
{
atomic_t refcnt;
unsigned int len; /* Number of filter blocks */
- unsigned int (*bpf_func)(const struct sk_buff *skb,
- const struct sock_filter *filter);
+ bpf_func_t bpf_func;
struct rcu_head rcu;
struct sock_filter insns[0];
};
@@ -48,11 +50,12 @@ extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len);

#ifdef CONFIG_BPF_JIT
+#include <stdarg.h>
#include <linux/linkage.h>
#include <linux/printk.h>

-extern void bpf_jit_compile(struct sk_filter *fp);
-extern void bpf_jit_free(struct sk_filter *fp);
+extern bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen);
+extern void bpf_jit_free(bpf_func_t bpf_func);

static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
u32 pass, void *image)
@@ -65,10 +68,11 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
}
#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
#else
-static inline void bpf_jit_compile(struct sk_filter *fp)
+static inline bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen)
{
+ return sk_run_filter;
}
-static inline void bpf_jit_free(struct sk_filter *fp)
+static inline void bpf_jit_free(bpf_func_t bpf_func)
{
}
#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5af44b5..f784feb 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -55,6 +55,7 @@ struct seccomp_filter {
atomic_t usage;
struct seccomp_filter *prev;
unsigned short len; /* Instruction count */
+ bpf_func_t bpf_func;
struct sock_filter insns[];
};

@@ -211,7 +212,7 @@ static u32 seccomp_run_filters(int syscall)
* value always takes priority (ignoring the DATA).
*/
for (f = current->seccomp.filter; f; f = f->prev) {
- u32 cur_ret = sk_run_filter(NULL, f->insns);
+ u32 cur_ret = SK_RUN_FILTER(f, NULL);
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
ret = cur_ret;
}
@@ -273,6 +274,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
if (ret)
goto fail;

+ filter->bpf_func = bpf_jit_compile(filter->insns, filter->len);
+
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
@@ -330,6 +333,7 @@ void put_seccomp_filter(struct task_struct *tsk)
while (orig && atomic_dec_and_test(&orig->usage)) {
struct seccomp_filter *freeme = orig;
orig = orig->prev;
+ bpf_jit_free(freeme->bpf_func);
kfree(freeme);
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index dad2a17..0a7900b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -643,7 +643,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

- bpf_jit_free(fp);
+ bpf_jit_free(fp->bpf_func);
kfree(fp);
}
EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -652,13 +652,11 @@ static int __sk_prepare_filter(struct sk_filter *fp)
{
int err;

- fp->bpf_func = sk_run_filter;
-
err = sk_chk_filter(fp->insns, fp->len);
if (err)
return err;

- bpf_jit_compile(fp);
+ fp->bpf_func = bpf_jit_compile(fp->insns, fp->len);
return 0;
}

--
1.8.1.2

2013-04-26 11:26:47

by Heiko Carstens

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On Fri, Apr 26, 2013 at 03:51:40AM -0400, Xi Wang wrote:
> This patchset brings JIT support to seccomp filters for x86_64 and ARM.
> It is against the net-next tree.
>
> The current BPF JIT interface only accepts sk_filter, not seccomp_filter.
> Patch 1/6 refactors the interface to make it more general.
>
> With the refactored interface, patches 2/6 and 3/6 implement the seccomp
> BPF_S_ANC_SECCOMP_LD_W instruction in x86 & ARM JIT.
>
> Status:
>
> * x86_64 & ARM: JIT tested with seccomp examples.
>
> * powerpc [4/6]: no seccomp change - compile checked.
>
> * sparc [5/6] & s390 [6/6]: no seccomp change - untested.
>
> Sorry I have no sparc or s390 build environment here. Can someone help
> check 5/6 and 6/6? Thanks.

Your patches are against which tree?
They don't apply on top of linux-next or Linus' linux tree.

Btw. are there any test cases around for BPF JIT?
Not only for the new seccomp but also netfilter?

2013-04-26 11:46:31

by Heiko Carstens

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On Fri, Apr 26, 2013 at 01:25:39PM +0200, Heiko Carstens wrote:
> On Fri, Apr 26, 2013 at 03:51:40AM -0400, Xi Wang wrote:
> > This patchset brings JIT support to seccomp filters for x86_64 and ARM.
> > It is against the net-next tree.
> >
> > The current BPF JIT interface only accepts sk_filter, not seccomp_filter.
> > Patch 1/6 refactors the interface to make it more general.
> >
> > With the refactored interface, patches 2/6 and 3/6 implement the seccomp
> > BPF_S_ANC_SECCOMP_LD_W instruction in x86 & ARM JIT.
> >
> > Status:
> >
> > * x86_64 & ARM: JIT tested with seccomp examples.
> >
> > * powerpc [4/6]: no seccomp change - compile checked.
> >
> > * sparc [5/6] & s390 [6/6]: no seccomp change - untested.
> >
> > Sorry I have no sparc or s390 build environment here. Can someone help
> > check 5/6 and 6/6? Thanks.
>
> Your patches are against which tree?
> They don't apply on top of linux-next or Linus' linux tree.

Right... just right the subject of each patch description says, they apply
against net-next.

And build fine on s390.

> Btw. are there any test cases around for BPF JIT?
> Not only for the new seccomp but also netfilter?

This however is still a valid question.

2013-04-26 11:46:58

by Daniel Borkmann

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On 04/26/2013 01:25 PM, Heiko Carstens wrote:
> On Fri, Apr 26, 2013 at 03:51:40AM -0400, Xi Wang wrote:
>> This patchset brings JIT support to seccomp filters for x86_64 and ARM.
>> It is against the net-next tree.
>>
>> The current BPF JIT interface only accepts sk_filter, not seccomp_filter.
>> Patch 1/6 refactors the interface to make it more general.
>>
>> With the refactored interface, patches 2/6 and 3/6 implement the seccomp
>> BPF_S_ANC_SECCOMP_LD_W instruction in x86 & ARM JIT.
>>
>> Status:
>>
>> * x86_64 & ARM: JIT tested with seccomp examples.
>>
>> * powerpc [4/6]: no seccomp change - compile checked.
>>
>> * sparc [5/6] & s390 [6/6]: no seccomp change - untested.
>>
>> Sorry I have no sparc or s390 build environment here. Can someone help
>> check 5/6 and 6/6? Thanks.
>
> Your patches are against which tree?
> They don't apply on top of linux-next or Linus' linux tree.

In the subject line it says net-next.

> Btw. are there any test cases around for BPF JIT?
> Not only for the new seccomp but also netfilter?

BPF is used in packet(7) sockets, not Netfilter. But yeah, test
cases for different archs would be really good, i.e. if seccomp
wants to use BPF JIT, where seccomp is used for user space program
sand-boxing (e.g. in Chromium), I can already guess that this will
put attention to certain people. :-)

I think BPF JIT for seccomp on ARM recently got applied to -mm tree
if I'm not mistaken. It was from Nicolas Schichan (cc):

http://thread.gmane.org/gmane.linux.ports.arm.kernel/233416/

2013-04-26 12:16:23

by Xi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On Fri, Apr 26, 2013 at 7:46 AM, Heiko Carstens
<[email protected]> wrote:
> And build fine on s390.

Thanks!

>> Btw. are there any test cases around for BPF JIT?
>> Not only for the new seccomp but also netfilter?
>
> This however is still a valid question.

Not sure about test cases for BPF JIT in general. I used the examples
under "samples/seccomp/" for testing; they are seccomp filters though.

- xi

2013-04-26 12:32:22

by Xi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On Fri, Apr 26, 2013 at 7:46 AM, Daniel Borkmann <[email protected]> wrote:
> I think BPF JIT for seccomp on ARM recently got applied to -mm tree
> if I'm not mistaken. It was from Nicolas Schichan (cc):
>
> http://thread.gmane.org/gmane.linux.ports.arm.kernel/233416/

Thanks for the pointer.

For the ARM part, looks like Nicolas's patch requires to implement two
wrappers for each arch:

void seccomp_jit_compile(struct seccomp_filter *fp);
void seccomp_jit_free(struct seccomp_filter *fp);

The implementation of these wrappers is almost identical to:

void bpf_jit_compile(struct sk_filter *fp);
void bpf_jit_free(struct sk_filter *fp);

While this patch uses a unified interface for both packet & seccomp filters.

bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen);
void bpf_jit_free(bpf_func_t bpf_func);

Shouldn't be hard to merge though.

- xi

2013-04-26 12:38:57

by Daniel Borkmann

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On 04/26/2013 02:31 PM, Xi Wang wrote:
> On Fri, Apr 26, 2013 at 7:46 AM, Daniel Borkmann <[email protected]> wrote:
>> I think BPF JIT for seccomp on ARM recently got applied to -mm tree
>> if I'm not mistaken. It was from Nicolas Schichan (cc):
>>
>> http://thread.gmane.org/gmane.linux.ports.arm.kernel/233416/
>
> Thanks for the pointer.
>
> For the ARM part, looks like Nicolas's patch requires to implement two
> wrappers for each arch:
>
> void seccomp_jit_compile(struct seccomp_filter *fp);
> void seccomp_jit_free(struct seccomp_filter *fp);
>
> The implementation of these wrappers is almost identical to:
>
> void bpf_jit_compile(struct sk_filter *fp);
> void bpf_jit_free(struct sk_filter *fp);
>
> While this patch uses a unified interface for both packet & seccomp filters.
>
> bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen);
> void bpf_jit_free(bpf_func_t bpf_func);

A unified interface seems more clean, imho.

> Shouldn't be hard to merge though.

2013-04-26 14:18:51

by Eric Dumazet

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, 2013-04-26 at 03:51 -0400, Xi Wang wrote:

> +#ifdef CONFIG_SECCOMP_FILTER
> + case BPF_S_ANC_SECCOMP_LD_W:
> + if (K == offsetof(struct seccomp_data, arch)) {
> + int arch = syscall_get_arch(current, NULL);
> +
> + EMIT1_off32(0xb8, arch); /* mov arch,%eax */
> + break;
> + }
> + func = (u8 *)seccomp_bpf_load;
> + t_offset = func - (image + addrs[i]);
> + EMIT1_off32(0xbf, K); /* mov imm32,%edi */
> + EMIT1_off32(0xe8, t_offset); /* call seccomp_bpf_load */
> + break;
> +#endif

This seems seriously wrong to me.

This cannot have been tested at all.


2013-04-26 14:50:48

by Xi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, Apr 26, 2013 at 10:18 AM, Eric Dumazet <[email protected]> wrote:
> On Fri, 2013-04-26 at 03:51 -0400, Xi Wang wrote:
>
>> +#ifdef CONFIG_SECCOMP_FILTER
>> + case BPF_S_ANC_SECCOMP_LD_W:
>> + if (K == offsetof(struct seccomp_data, arch)) {
>> + int arch = syscall_get_arch(current, NULL);
>> +
>> + EMIT1_off32(0xb8, arch); /* mov arch,%eax */
>> + break;
>> + }
>> + func = (u8 *)seccomp_bpf_load;
>> + t_offset = func - (image + addrs[i]);
>> + EMIT1_off32(0xbf, K); /* mov imm32,%edi */
>> + EMIT1_off32(0xe8, t_offset); /* call seccomp_bpf_load */
>> + break;
>> +#endif
>
> This seems seriously wrong to me.

Can you elaborate?

> This cannot have been tested at all.

Thanks to QEMU for hiding bugs then. :)

- xi

2013-04-26 15:11:38

by Eric Dumazet

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, 2013-04-26 at 10:50 -0400, Xi Wang wrote:
> On Fri, Apr 26, 2013 at 10:18 AM, Eric Dumazet <[email protected]> wrote:
> > On Fri, 2013-04-26 at 03:51 -0400, Xi Wang wrote:
> >
> >> +#ifdef CONFIG_SECCOMP_FILTER
> >> + case BPF_S_ANC_SECCOMP_LD_W:
> >> + if (K == offsetof(struct seccomp_data, arch)) {
> >> + int arch = syscall_get_arch(current, NULL);
> >> +
> >> + EMIT1_off32(0xb8, arch); /* mov arch,%eax */
> >> + break;
> >> + }
> >> + func = (u8 *)seccomp_bpf_load;
> >> + t_offset = func - (image + addrs[i]);
> >> + EMIT1_off32(0xbf, K); /* mov imm32,%edi */
> >> + EMIT1_off32(0xe8, t_offset); /* call seccomp_bpf_load */
> >> + break;
> >> +#endif
> >
> > This seems seriously wrong to me.
>
> Can you elaborate?
>
> > This cannot have been tested at all.
>
> Thanks to QEMU for hiding bugs then. :)



1) 'current' at the time the code is jitted (compiled) is not the
'current' at the time the filter will be evaluated.

On x86_64, if CONFIG_IA32_EMULATION=y, syscall_get_arch() evaluates to :

if (task_thread_info(task)->status & TS_COMPAT)
return AUDIT_ARCH_I386;
return AUDIT_ARCH_X86_64;

So your code is completely wrong.

2) Calling a function potentially destroys some registers.
%rdi,%r8,%r9 for instance, so we are going to crash very easily.

I dont know, I feel a bit uncomfortable having to explain this to
someone sending security related patches...



2013-04-26 15:16:30

by David Laight

[permalink] [raw]
Subject: RE: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

> >> +#ifdef CONFIG_SECCOMP_FILTER
> >> + case BPF_S_ANC_SECCOMP_LD_W:
> >> + if (K == offsetof(struct seccomp_data, arch)) {
> >> + int arch = syscall_get_arch(current, NULL);
> >> +
> >> + EMIT1_off32(0xb8, arch); /* mov arch,%eax */
> >> + break;
> >> + }
> >> + func = (u8 *)seccomp_bpf_load;
> >> + t_offset = func - (image + addrs[i]);
> >> + EMIT1_off32(0xbf, K); /* mov imm32,%edi */
> >> + EMIT1_off32(0xe8, t_offset); /* call seccomp_bpf_load */
> >> + break;
> >> +#endif
> >
> > This seems seriously wrong to me.
>
> Can you elaborate?

The 'call seccomp_bpf_load' needs a pc-relative offset,
I assume that is what EMIT1_off32() generates.

The other two instructions want an absolute 32 bit value...

David


2013-04-26 15:20:40

by Eric Dumazet

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 1/6] filter: refactor BPF JIT for seccomp filters

On Fri, 2013-04-26 at 03:51 -0400, Xi Wang wrote:
> Currently, bpf_jit_compile() and bpf_jit_free() takes an sk_filter,
> which seccomp filters cannot reuse.
>
> Change bpf_jit_compile() to take a pointer to BPF instructions and
> the length, and to return a JITted function.
>
> Change bpf_jit_free() to take a JITted function.
>
> Add JIT calls for seccomp filters.
>
> Signed-off-by: Xi Wang <[email protected]>
> ---

When submitting a patch serie, full kernel must be fully compile-able
after each patch.

Thats mandatory to be able to perform git bisection in the future.

You cannot change the prototypes as you do in this patch, because it
breaks all the BPF JIT.

arch/x86/net/bpf_jit_comp.c:147:6: error: conflicting types for ‘bpf_jit_compile’
include/linux/filter.h:57:19: note: previous declaration of ‘bpf_jit_compile’ was here
arch/x86/net/bpf_jit_comp.c:749:6: error: conflicting types for ‘bpf_jit_free’
include/linux/filter.h:58:13: note: previous declaration of ‘bpf_jit_free’ was here
make[1]: *** [arch/x86/net/bpf_jit_comp.o] Error 1
make: *** [arch/x86/net/bpf_jit_comp.o] Error 2

2013-04-26 15:27:51

by Eric Dumazet

[permalink] [raw]
Subject: RE: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, 2013-04-26 at 16:15 +0100, David Laight wrote:
> > >> +#ifdef CONFIG_SECCOMP_FILTER
> > >> + case BPF_S_ANC_SECCOMP_LD_W:
> > >> + if (K == offsetof(struct seccomp_data, arch)) {
> > >> + int arch = syscall_get_arch(current, NULL);
> > >> +
> > >> + EMIT1_off32(0xb8, arch); /* mov arch,%eax */
> > >> + break;
> > >> + }
> > >> + func = (u8 *)seccomp_bpf_load;
> > >> + t_offset = func - (image + addrs[i]);
> > >> + EMIT1_off32(0xbf, K); /* mov imm32,%edi */
> > >> + EMIT1_off32(0xe8, t_offset); /* call seccomp_bpf_load */
> > >> + break;
> > >> +#endif
> > >
> > > This seems seriously wrong to me.
> >
> > Can you elaborate?
>
> The 'call seccomp_bpf_load' needs a pc-relative offset,
> I assume that is what EMIT1_off32() generates.
>
> The other two instructions want an absolute 32 bit value...

Hmm, this part is fine, we perform the relative adjustments in
t_offset = func - (image + addrs[i]);



2013-04-26 15:30:14

by Xi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, Apr 26, 2013 at 11:11 AM, Eric Dumazet <[email protected]> wrote:
> 2) Calling a function potentially destroys some registers.
> %rdi,%r8,%r9 for instance, so we are going to crash very easily.
>
> I dont know, I feel a bit uncomfortable having to explain this to
> someone sending security related patches...

My old code did save these registers. But, do we really need that for
seccomp? For example, %rdi (skb) is always NULL and never used by
seccomp filters. Did I miss anything?

- xi

2013-04-26 15:40:05

by David Laight

[permalink] [raw]
Subject: RE: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

> > > >> + func = (u8 *)seccomp_bpf_load;
> > > >> + t_offset = func - (image + addrs[i]);
> > > >> + EMIT1_off32(0xbf, K); /* mov imm32,%edi */
> > > >> + EMIT1_off32(0xe8, t_offset); /* call seccomp_bpf_load */
> > > >> + break;
> > > >> +#endif
> > > >
> > > > This seems seriously wrong to me.
> > >
> > > Can you elaborate?
> >
> > The 'call seccomp_bpf_load' needs a pc-relative offset,
> > I assume that is what EMIT1_off32() generates.
> >
> > The other two instructions want an absolute 32 bit value...
>
> Hmm, this part is fine, we perform the relative adjustments in
> t_offset = func - (image + addrs[i]);

The call needs the displacement from the address of
the instruction following the call.
I can't imagine any way in which above can allow for the 5 byte
'mov imm32,%edi' instruction.

I'd have thought there would be an EMIT1_imm32().
(I've written a lot of x86 asm in my days!)

David

????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?

2013-04-26 15:43:45

by Eric Dumazet

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, 2013-04-26 at 11:29 -0400, Xi Wang wrote:
> On Fri, Apr 26, 2013 at 11:11 AM, Eric Dumazet <[email protected]> wrote:
> > 2) Calling a function potentially destroys some registers.
> > %rdi,%r8,%r9 for instance, so we are going to crash very easily.
> >
> > I dont know, I feel a bit uncomfortable having to explain this to
> > someone sending security related patches...
>
> My old code did save these registers. But, do we really need that for
> seccomp? For example, %rdi (skb) is always NULL and never used by
> seccomp filters. Did I miss anything?

I do not know.

This is not explained in your changelog or in any comment.

You have to make the full analysis yourself and make us comfortable with
the results.

You send patches and ask us to spend hours on it, this is not how it
works.


2013-04-26 15:46:29

by Eric Dumazet

[permalink] [raw]
Subject: RE: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, 2013-04-26 at 16:38 +0100, David Laight wrote:

> The call needs the displacement from the address of
> the instruction following the call.
> I can't imagine any way in which above can allow for the 5 byte
> 'mov imm32,%edi' instruction.
>
> I'd have thought there would be an EMIT1_imm32().
> (I've written a lot of x86 asm in my days!)
>

Thats the same.

off32 is a 32bit quantity, or immediate 32 if you prefer.


2013-04-26 15:58:28

by Xi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, Apr 26, 2013 at 11:43 AM, Eric Dumazet <[email protected]> wrote:
> I do not know.
>
> This is not explained in your changelog or in any comment.
>
> You have to make the full analysis yourself and make us comfortable with
> the results.
>
> You send patches and ask us to spend hours on it, this is not how it
> works.

"do we really need that for seccomp?" is not asking. I just tried to
explain in a gentle way.

%rdi,%r8,%r9 are not used by seccomp filters so I removed the push/pop
part. I agree that I should explain the details in the code comments
or logs. Thanks for catching that.

- xi

2013-04-26 16:02:47

by Xi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, Apr 26, 2013 at 11:11 AM, Eric Dumazet <[email protected]> wrote:
> 1) 'current' at the time the code is jitted (compiled) is not the
> 'current' at the time the filter will be evaluated.
>
> On x86_64, if CONFIG_IA32_EMULATION=y, syscall_get_arch() evaluates to :
>
> if (task_thread_info(task)->status & TS_COMPAT)
> return AUDIT_ARCH_I386;
> return AUDIT_ARCH_X86_64;
>
> So your code is completely wrong.

Just to be clear, are you worrying about a process changing its
personality after installing seccomp filters?

- xi

2013-04-26 16:14:05

by Eric Dumazet

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, 2013-04-26 at 12:02 -0400, Xi Wang wrote:
> On Fri, Apr 26, 2013 at 11:11 AM, Eric Dumazet <[email protected]> wrote:
> > 1) 'current' at the time the code is jitted (compiled) is not the
> > 'current' at the time the filter will be evaluated.
> >
> > On x86_64, if CONFIG_IA32_EMULATION=y, syscall_get_arch() evaluates to :
> >
> > if (task_thread_info(task)->status & TS_COMPAT)
> > return AUDIT_ARCH_I386;
> > return AUDIT_ARCH_X86_64;
> >
> > So your code is completely wrong.
>
> Just to be clear, are you worrying about a process changing its
> personality after installing seccomp filters?

You didn't explained how things worked.

Are you assuming we network guys know everything ?

Just to make it very clear :

We are very dumb and you must explain us everything.

If process would not change personality, why do we have get_arch() at
all ? Why isn't it optimized outside of the JIT itself, in the generic
seccomp checker, its a single "A = K" instruction after all.

Why this part is even in the x86 BPF JIT ?

To me it looks like _if_ get_arch() is provided in BPF, its for a
reason, and your implementation looks very suspicious, if not buggy.


2013-04-26 18:26:33

by Xi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

Not sure how many you are speaking for when you say "We are very dumb". :)

Thanks for catching this. I'l remove this arch thing in v2.

To address your other concern about registers, I'll add some comments
to the code, something like:

"%rdi,%r8,%r9 are not used by seccomp filters; it's safe to not save them."

- xi

On Fri, Apr 26, 2013 at 12:14 PM, Eric Dumazet <[email protected]> wrote:
> On Fri, 2013-04-26 at 12:02 -0400, Xi Wang wrote:
>> On Fri, Apr 26, 2013 at 11:11 AM, Eric Dumazet <[email protected]> wrote:
>> > 1) 'current' at the time the code is jitted (compiled) is not the
>> > 'current' at the time the filter will be evaluated.
>> >
>> > On x86_64, if CONFIG_IA32_EMULATION=y, syscall_get_arch() evaluates to :
>> >
>> > if (task_thread_info(task)->status & TS_COMPAT)
>> > return AUDIT_ARCH_I386;
>> > return AUDIT_ARCH_X86_64;
>> >
>> > So your code is completely wrong.
>>
>> Just to be clear, are you worrying about a process changing its
>> personality after installing seccomp filters?
>
> You didn't explained how things worked.
>
> Are you assuming we network guys know everything ?
>
> Just to make it very clear :
>
> We are very dumb and you must explain us everything.
>
> If process would not change personality, why do we have get_arch() at
> all ? Why isn't it optimized outside of the JIT itself, in the generic
> seccomp checker, its a single "A = K" instruction after all.
>
> Why this part is even in the x86 BPF JIT ?
>
> To me it looks like _if_ get_arch() is provided in BPF, its for a
> reason, and your implementation looks very suspicious, if not buggy.
>
>
>

2013-04-26 18:40:11

by Eric Dumazet

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

On Fri, 2013-04-26 at 14:25 -0400, Xi Wang wrote:
> Not sure how many you are speaking for when you say "We are very dumb". :)
>
> Thanks for catching this. I'l remove this arch thing in v2.
>
> To address your other concern about registers, I'll add some comments
> to the code, something like:
>
> "%rdi,%r8,%r9 are not used by seccomp filters; it's safe to not save them."

OK good

BTW, most of us prefer Bottom posting on lkml/netdev

http://en.wikipedia.org/wiki/Posting_style#Bottom-posting

Thanks

2013-04-26 18:48:12

by David Miller

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 2/6] x86: bpf_jit_comp: support BPF_S_ANC_SECCOMP_LD_W instruction

From: Eric Dumazet <[email protected]>
Date: Fri, 26 Apr 2013 08:43:41 -0700

> You send patches and ask us to spend hours on it, this is not how it
> works.

+1

2013-04-29 12:18:16

by Nicolas Schichan

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On 04/26/2013 02:31 PM, Xi Wang wrote:
> Thanks for the pointer.
>
> For the ARM part, looks like Nicolas's patch requires to implement two
> wrappers for each arch:
>
> void seccomp_jit_compile(struct seccomp_filter *fp);
> void seccomp_jit_free(struct seccomp_filter *fp);
>
> The implementation of these wrappers is almost identical to:
>
> void bpf_jit_compile(struct sk_filter *fp);
> void bpf_jit_free(struct sk_filter *fp);
>
> While this patch uses a unified interface for both packet & seccomp filters.
>
> bpf_func_t bpf_jit_compile(struct sock_filter *filter, unsigned int flen);
> void bpf_jit_free(bpf_func_t bpf_func);
>
> Shouldn't be hard to merge though.

Hi,

I went for the solution I submitted because I wanted to avoid changes to the
current bpf_jit_compile prototypes for all currently supported architectures
(for most of which, I can only compile-test).

My solution also allows the seccomp jit code to be disabled while still
allowing jit on socket filters (via a Kconfig option). This might be useful to
some people.

Regards,

--
Nicolas Schichan
Freebox SAS

2013-04-29 13:21:41

by Nicolas Schichan

[permalink] [raw]
Subject: Re: [RFC PATCH net-next 0/6] seccomp filter JIT

On 04/29/2013 02:18 PM, Nicolas Schichan wrote:
> My solution also allows the seccomp jit code to be disabled while still
> allowing jit on socket filters (via a Kconfig option). This might be useful to
> some people.

Please disregard the above, I had missed the fact that the
BPF_S_ANC_SECCOMP_LD_W case was embedded in an #ifdef CONFIG_SECCOMP_FILTER.

Regards,

--
Nicolas Schichan
Freebox SAS