Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752652AbaFBHCD (ORCPT ); Mon, 2 Jun 2014 03:02:03 -0400 Received: from mail-pb0-f54.google.com ([209.85.160.54]:55612 "EHLO mail-pb0-f54.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752598AbaFBHB6 (ORCPT ); Mon, 2 Jun 2014 03:01:58 -0400 From: Alexei Starovoitov To: "David S. Miller" Cc: Ingo Molnar , Steven Rostedt , Daniel Borkmann , Chema Gonzalez , Eric Dumazet , Peter Zijlstra , Arnaldo Carvalho de Melo , Jiri Olsa , Thomas Gleixner , "H. Peter Anvin" , Andrew Morton , Kees Cook , netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v2 net-next 1/2] net: filter: split filter.c into two files Date: Mon, 2 Jun 2014 00:01:45 -0700 Message-Id: <1401692506-7796-2-git-send-email-ast@plumgrid.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1401692506-7796-1-git-send-email-ast@plumgrid.com> References: <1401692506-7796-1-git-send-email-ast@plumgrid.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org BPF is used in several kernel components. This split creates logical boundary between generic BPF core and specific BPF use cases kernel/bpf/core.c: internal BPF interpreter, classic to internal converter, classic verifier net/core/filter.c: classic BPF extensions related to socket filters, socket attach/detach This patch only moves functions. No other changes. Next patch introduces hidden Kconfig flag, so seccomp and tracing filters can select BPF core only instead of depending on the whole NET Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 + kernel/Makefile | 1 + kernel/bpf/Makefile | 5 + kernel/bpf/core.c | 1042 ++++++++++++++++++++++++++++++++++++++++++++++++ net/core/filter.c | 1023 +---------------------------------------------- 5 files changed, 1052 insertions(+), 1021 deletions(-) create mode 100644 kernel/bpf/Makefile create mode 100644 kernel/bpf/core.c diff --git a/include/linux/filter.h b/include/linux/filter.h index f0c2ad43b4af..0e463ee77bb2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -341,6 +341,8 @@ void sk_filter_free(struct sk_filter *fp); int sk_convert_filter(struct sock_filter *prog, int len, struct sock_filter_int *new_prog, int *new_len); +bool sk_convert_bpf_extensions(struct sock_filter *fp, + struct sock_filter_int **insnp); int sk_unattached_filter_create(struct sk_filter **pfp, struct sock_fprog_kern *fprog); diff --git a/kernel/Makefile b/kernel/Makefile index f2a8b6246ce9..e7360b7c2c0e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o +obj-$(CONFIG_NET) += bpf/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile new file mode 100644 index 000000000000..2634b2fe5202 --- /dev/null +++ b/kernel/bpf/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the BPF core infrastructure +# + +obj-y := core.o diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c new file mode 100644 index 000000000000..22c2d99414c0 --- /dev/null +++ b/kernel/bpf/core.c @@ -0,0 +1,1042 @@ +/* + * Linux Socket Filter - Kernel level socket filtering + * + * Based on the design of the Berkeley Packet Filter. The new + * internal format has been designed by PLUMgrid: + * + * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com + * + * Authors: + * + * Jay Schulist + * Alexei Starovoitov + * Daniel Borkmann + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Andi Kleen - Fix a few bad bugs and races. + * Kris Katterjohn - Added many additional checks in sk_chk_filter() + */ + +#include +#include +#include + +/* Registers */ +#define BPF_R0 regs[BPF_REG_0] +#define BPF_R1 regs[BPF_REG_1] +#define BPF_R2 regs[BPF_REG_2] +#define BPF_R3 regs[BPF_REG_3] +#define BPF_R4 regs[BPF_REG_4] +#define BPF_R5 regs[BPF_REG_5] +#define BPF_R6 regs[BPF_REG_6] +#define BPF_R7 regs[BPF_REG_7] +#define BPF_R8 regs[BPF_REG_8] +#define BPF_R9 regs[BPF_REG_9] +#define BPF_R10 regs[BPF_REG_10] + +/* Named registers */ +#define A regs[insn->a_reg] +#define X regs[insn->x_reg] +#define FP regs[BPF_REG_FP] +#define ARG1 regs[BPF_REG_ARG1] +#define CTX regs[BPF_REG_CTX] +#define K insn->imm + +/* Exported for the bpf jit load helper */ +void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) +{ + u8 *ptr = NULL; + + if (k >= SKF_NET_OFF) + ptr = skb_network_header(skb) + k - SKF_NET_OFF; + else if (k >= SKF_LL_OFF) + ptr = skb_mac_header(skb) + k - SKF_LL_OFF; + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) + return ptr; + + return NULL; +} + +static inline void *load_pointer(const struct sk_buff *skb, int k, + unsigned int size, void *buffer) +{ + if (k >= 0) + return skb_header_pointer(skb, k, size, buffer); + + return bpf_internal_load_pointer_neg_helper(skb, k, size); +} + +/* Base function for offset calculation. Needs to go into .text section, + * therefore keeping it non-static as well; will also be used by JITs + * anyway later on, so do not let the compiler omit it. + */ +noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return 0; +} + +/** + * __sk_run_filter - run a filter on a given context + * @ctx: buffer to run the filter on + * @insn: filter to apply + * + * Decode and apply filter instructions to the skb->data. Return length to + * keep, 0 for none. @ctx is the data we are operating on, @insn is the + * array of filter instructions. + */ +static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) +{ + u64 stack[MAX_BPF_STACK / sizeof(u64)]; + u64 regs[MAX_BPF_REG], tmp; + static const void *jumptable[256] = { + [0 ... 255] = &&default_label, + /* Now overwrite non-defaults ... */ + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, + [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, + [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, + [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, + [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, + [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, + [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, + [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, + [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, + [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, + [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, + [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, + [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, + [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, + [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, + [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, + [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, + [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, + [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, + [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, + [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, + [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, + [BPF_ALU | BPF_NEG] = &&ALU_NEG, + [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, + [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, + /* 64 bit ALU operations */ + [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, + [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, + [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, + [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, + [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, + [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, + [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, + [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, + [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, + [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, + [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, + [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, + [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, + [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, + [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, + [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, + [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, + [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, + [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, + [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, + [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, + [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, + [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, + [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, + [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, + /* Call instruction */ + [BPF_JMP | BPF_CALL] = &&JMP_CALL, + /* Jumps */ + [BPF_JMP | BPF_JA] = &&JMP_JA, + [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, + [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, + [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, + [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, + [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, + [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, + [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, + [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, + [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, + [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, + [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, + [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, + [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, + [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, + /* Program return */ + [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, + /* Store instructions */ + [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, + [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, + [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, + [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, + [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, + [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, + [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, + [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, + [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, + [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, + /* Load instructions */ + [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, + [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, + [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, + [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, + [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W, + [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H, + [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B, + [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, + [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, + [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, + }; + void *ptr; + int off; + +#define CONT ({ insn++; goto select_insn; }) +#define CONT_JMP ({ insn++; goto select_insn; }) + + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; + ARG1 = (u64) (unsigned long) ctx; + + /* Register for user BPF programs need to be reset first. */ + regs[BPF_REG_A] = 0; + regs[BPF_REG_X] = 0; + +select_insn: + goto *jumptable[insn->code]; + + /* ALU */ +#define ALU(OPCODE, OP) \ + ALU64_##OPCODE##_X: \ + A = A OP X; \ + CONT; \ + ALU_##OPCODE##_X: \ + A = (u32) A OP (u32) X; \ + CONT; \ + ALU64_##OPCODE##_K: \ + A = A OP K; \ + CONT; \ + ALU_##OPCODE##_K: \ + A = (u32) A OP (u32) K; \ + CONT; + + ALU(ADD, +) + ALU(SUB, -) + ALU(AND, &) + ALU(OR, |) + ALU(LSH, <<) + ALU(RSH, >>) + ALU(XOR, ^) + ALU(MUL, *) +#undef ALU + ALU_NEG: + A = (u32) -A; + CONT; + ALU64_NEG: + A = -A; + CONT; + ALU_MOV_X: + A = (u32) X; + CONT; + ALU_MOV_K: + A = (u32) K; + CONT; + ALU64_MOV_X: + A = X; + CONT; + ALU64_MOV_K: + A = K; + CONT; + ALU64_ARSH_X: + (*(s64 *) &A) >>= X; + CONT; + ALU64_ARSH_K: + (*(s64 *) &A) >>= K; + CONT; + ALU64_MOD_X: + if (unlikely(X == 0)) + return 0; + tmp = A; + A = do_div(tmp, X); + CONT; + ALU_MOD_X: + if (unlikely(X == 0)) + return 0; + tmp = (u32) A; + A = do_div(tmp, (u32) X); + CONT; + ALU64_MOD_K: + tmp = A; + A = do_div(tmp, K); + CONT; + ALU_MOD_K: + tmp = (u32) A; + A = do_div(tmp, (u32) K); + CONT; + ALU64_DIV_X: + if (unlikely(X == 0)) + return 0; + do_div(A, X); + CONT; + ALU_DIV_X: + if (unlikely(X == 0)) + return 0; + tmp = (u32) A; + do_div(tmp, (u32) X); + A = (u32) tmp; + CONT; + ALU64_DIV_K: + do_div(A, K); + CONT; + ALU_DIV_K: + tmp = (u32) A; + do_div(tmp, (u32) K); + A = (u32) tmp; + CONT; + ALU_END_TO_BE: + switch (K) { + case 16: + A = (__force u16) cpu_to_be16(A); + break; + case 32: + A = (__force u32) cpu_to_be32(A); + break; + case 64: + A = (__force u64) cpu_to_be64(A); + break; + } + CONT; + ALU_END_TO_LE: + switch (K) { + case 16: + A = (__force u16) cpu_to_le16(A); + break; + case 32: + A = (__force u32) cpu_to_le32(A); + break; + case 64: + A = (__force u64) cpu_to_le64(A); + break; + } + CONT; + + /* CALL */ + JMP_CALL: + /* Function call scratches BPF_R1-BPF_R5 registers, + * preserves BPF_R6-BPF_R9, and stores return value + * into BPF_R0. + */ + BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3, + BPF_R4, BPF_R5); + CONT; + + /* JMP */ + JMP_JA: + insn += insn->off; + CONT; + JMP_JEQ_X: + if (A == X) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JEQ_K: + if (A == K) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JNE_X: + if (A != X) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JNE_K: + if (A != K) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGT_X: + if (A > X) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGT_K: + if (A > K) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGE_X: + if (A >= X) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGE_K: + if (A >= K) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGT_X: + if (((s64) A) > ((s64) X)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGT_K: + if (((s64) A) > ((s64) K)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGE_X: + if (((s64) A) >= ((s64) X)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGE_K: + if (((s64) A) >= ((s64) K)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSET_X: + if (A & X) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSET_K: + if (A & K) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_EXIT: + return BPF_R0; + + /* STX and ST and LDX*/ +#define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (A + insn->off) = X; \ + CONT; \ + ST_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (A + insn->off) = K; \ + CONT; \ + LDX_MEM_##SIZEOP: \ + A = *(SIZE *)(unsigned long) (X + insn->off); \ + CONT; + + LDST(B, u8) + LDST(H, u16) + LDST(W, u32) + LDST(DW, u64) +#undef LDST + STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */ + atomic_add((u32) X, (atomic_t *)(unsigned long) + (A + insn->off)); + CONT; + STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ + atomic64_add((u64) X, (atomic64_t *)(unsigned long) + (A + insn->off)); + CONT; + LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */ + off = K; +load_word: + /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are + * only appearing in the programs where ctx == + * skb. All programs keep 'ctx' in regs[BPF_REG_CTX] + * == BPF_R6, sk_convert_filter() saves it in BPF_R6, + * internal BPF verifier will check that BPF_R6 == + * ctx. + * + * BPF_ABS and BPF_IND are wrappers of function calls, + * so they scratch BPF_R1-BPF_R5 registers, preserve + * BPF_R6-BPF_R9, and store return value into BPF_R0. + * + * Implicit input: + * ctx + * + * Explicit input: + * X == any register + * K == 32-bit immediate + * + * Output: + * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness + */ + + ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); + if (likely(ptr != NULL)) { + BPF_R0 = get_unaligned_be32(ptr); + CONT; + } + + return 0; + LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */ + off = K; +load_half: + ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); + if (likely(ptr != NULL)) { + BPF_R0 = get_unaligned_be16(ptr); + CONT; + } + + return 0; + LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */ + off = K; +load_byte: + ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); + if (likely(ptr != NULL)) { + BPF_R0 = *(u8 *)ptr; + CONT; + } + + return 0; + LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */ + off = K + X; + goto load_word; + LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */ + off = K + X; + goto load_half; + LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */ + off = K + X; + goto load_byte; + + default_label: + /* If we ever reach this, we have a bug somewhere. */ + WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); + return 0; +} + +/** + * sk_convert_filter - convert filter program + * @prog: the user passed filter program + * @len: the length of the user passed filter program + * @new_prog: buffer where converted program will be stored + * @new_len: pointer to store length of converted program + * + * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style. + * Conversion workflow: + * + * 1) First pass for calculating the new program length: + * sk_convert_filter(old_prog, old_len, NULL, &new_len) + * + * 2) 2nd pass to remap in two passes: 1st pass finds new + * jump offsets, 2nd pass remapping: + * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len); + * sk_convert_filter(old_prog, old_len, new_prog, &new_len); + * + * User BPF's register A is mapped to our BPF register 6, user BPF + * register X is mapped to BPF register 7; frame pointer is always + * register 10; Context 'void *ctx' is stored in register 1, that is, + * for socket filters: ctx == 'struct sk_buff *', for seccomp: + * ctx == 'struct seccomp_data *'. + */ +int sk_convert_filter(struct sock_filter *prog, int len, + struct sock_filter_int *new_prog, int *new_len) +{ + int new_flen = 0, pass = 0, target, i; + struct sock_filter_int *new_insn; + struct sock_filter *fp; + int *addrs = NULL; + u8 bpf_src; + + BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); + BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); + + if (len <= 0 || len >= BPF_MAXINSNS) + return -EINVAL; + + if (new_prog) { + addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + } + +do_pass: + new_insn = new_prog; + fp = prog; + + if (new_insn) + *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); + new_insn++; + + for (i = 0; i < len; fp++, i++) { + struct sock_filter_int tmp_insns[6] = { }; + struct sock_filter_int *insn = tmp_insns; + + if (addrs) + addrs[i] = new_insn - new_prog; + + switch (fp->code) { + /* All arithmetic insns and skb loads map as-is. */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU | BPF_NEG: + case BPF_LD | BPF_ABS | BPF_W: + case BPF_LD | BPF_ABS | BPF_H: + case BPF_LD | BPF_ABS | BPF_B: + case BPF_LD | BPF_IND | BPF_W: + case BPF_LD | BPF_IND | BPF_H: + case BPF_LD | BPF_IND | BPF_B: + /* Check for overloaded BPF extension and + * directly convert it if found, otherwise + * just move on with mapping. + */ + if (BPF_CLASS(fp->code) == BPF_LD && + BPF_MODE(fp->code) == BPF_ABS && + sk_convert_bpf_extensions(fp, &insn)) + break; + + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); + break; + + /* Jump transformation cannot use BPF block macros + * everywhere as offset calculation and target updates + * require a bit more work than the rest, i.e. jump + * opcodes map as-is, but offsets need adjustment. + */ + +#define BPF_EMIT_JMP \ + do { \ + if (target >= len || target < 0) \ + goto err; \ + insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ + /* Adjust pc relative offset for 2nd or 3rd insn. */ \ + insn->off -= insn - tmp_insns; \ + } while (0) + + case BPF_JMP | BPF_JA: + target = i + fp->k + 1; + insn->code = fp->code; + BPF_EMIT_JMP; + break; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { + /* BPF immediates are signed, zero extend + * immediate into tmp register and use it + * in compare insn. + */ + *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); + + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_TMP; + bpf_src = BPF_X; + } else { + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; + insn->imm = fp->k; + bpf_src = BPF_SRC(fp->code); + } + + /* Common case where 'jump_false' is next insn. */ + if (fp->jf == 0) { + insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; + target = i + fp->jt + 1; + BPF_EMIT_JMP; + break; + } + + /* Convert JEQ into JNE when 'jump_true' is next insn. */ + if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { + insn->code = BPF_JMP | BPF_JNE | bpf_src; + target = i + fp->jf + 1; + BPF_EMIT_JMP; + break; + } + + /* Other jumps are mapped into two insns: Jxx and JA. */ + target = i + fp->jt + 1; + insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; + BPF_EMIT_JMP; + insn++; + + insn->code = BPF_JMP | BPF_JA; + target = i + fp->jf + 1; + BPF_EMIT_JMP; + break; + + /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ + case BPF_LDX | BPF_MSH | BPF_B: + /* tmp = A */ + *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); + /* A = BPF_R0 = *(u8 *) (skb->data + K) */ + *insn++ = BPF_LD_ABS(BPF_B, fp->k); + /* A &= 0xf */ + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); + /* A <<= 2 */ + *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); + /* X = A */ + *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); + /* A = tmp */ + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); + break; + + /* RET_K, RET_A are remaped into 2 insns. */ + case BPF_RET | BPF_A: + case BPF_RET | BPF_K: + *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? + BPF_K : BPF_X, BPF_REG_0, + BPF_REG_A, fp->k); + *insn = BPF_EXIT_INSN(); + break; + + /* Store to stack. */ + case BPF_ST: + case BPF_STX: + *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == + BPF_ST ? BPF_REG_A : BPF_REG_X, + -(BPF_MEMWORDS - fp->k) * 4); + break; + + /* Load from stack. */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_FP, + -(BPF_MEMWORDS - fp->k) * 4); + break; + + /* A = K or X = K */ + case BPF_LD | BPF_IMM: + case BPF_LDX | BPF_IMM: + *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, fp->k); + break; + + /* X = A */ + case BPF_MISC | BPF_TAX: + *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); + break; + + /* A = X */ + case BPF_MISC | BPF_TXA: + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); + break; + + /* A = skb->len or X = skb->len */ + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LDX | BPF_W | BPF_LEN: + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_CTX, + offsetof(struct sk_buff, len)); + break; + + /* Access seccomp_data fields. */ + case BPF_LDX | BPF_ABS | BPF_W: + /* A = *(u32 *) (ctx + K) */ + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); + break; + + /* Unkown instruction. */ + default: + goto err; + } + + insn++; + if (new_prog) + memcpy(new_insn, tmp_insns, + sizeof(*insn) * (insn - tmp_insns)); + new_insn += insn - tmp_insns; + } + + if (!new_prog) { + /* Only calculating new length. */ + *new_len = new_insn - new_prog; + return 0; + } + + pass++; + if (new_flen != new_insn - new_prog) { + new_flen = new_insn - new_prog; + if (pass > 2) + goto err; + goto do_pass; + } + + kfree(addrs); + BUG_ON(*new_len != new_flen); + return 0; +err: + kfree(addrs); + return -EINVAL; +} + +/* Security: + * + * A BPF program is able to use 16 cells of memory to store intermediate + * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). + * + * As we dont want to clear mem[] array for each packet going through + * sk_run_filter(), we check that filter loaded by user never try to read + * a cell if not previously written, and we check all branches to be sure + * a malicious user doesn't try to abuse us. + */ +static int check_load_and_stores(struct sock_filter *filter, int flen) +{ + u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ + int pc, ret = 0; + + BUILD_BUG_ON(BPF_MEMWORDS > 16); + + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); + if (!masks) + return -ENOMEM; + + memset(masks, 0xff, flen * sizeof(*masks)); + + for (pc = 0; pc < flen; pc++) { + memvalid &= masks[pc]; + + switch (filter[pc].code) { + case BPF_ST: + case BPF_STX: + memvalid |= (1 << filter[pc].k); + break; + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + if (!(memvalid & (1 << filter[pc].k))) { + ret = -EINVAL; + goto error; + } + break; + case BPF_JMP | BPF_JA: + /* A jump must set masks on target */ + masks[pc + 1 + filter[pc].k] &= memvalid; + memvalid = ~0; + break; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* A jump must set masks on targets */ + masks[pc + 1 + filter[pc].jt] &= memvalid; + masks[pc + 1 + filter[pc].jf] &= memvalid; + memvalid = ~0; + break; + } + } +error: + kfree(masks); + return ret; +} + +static bool chk_code_allowed(u16 code_to_probe) +{ + static const bool codes[] = { + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_K] = true, + [BPF_ALU | BPF_ADD | BPF_X] = true, + [BPF_ALU | BPF_SUB | BPF_K] = true, + [BPF_ALU | BPF_SUB | BPF_X] = true, + [BPF_ALU | BPF_MUL | BPF_K] = true, + [BPF_ALU | BPF_MUL | BPF_X] = true, + [BPF_ALU | BPF_DIV | BPF_K] = true, + [BPF_ALU | BPF_DIV | BPF_X] = true, + [BPF_ALU | BPF_MOD | BPF_K] = true, + [BPF_ALU | BPF_MOD | BPF_X] = true, + [BPF_ALU | BPF_AND | BPF_K] = true, + [BPF_ALU | BPF_AND | BPF_X] = true, + [BPF_ALU | BPF_OR | BPF_K] = true, + [BPF_ALU | BPF_OR | BPF_X] = true, + [BPF_ALU | BPF_XOR | BPF_K] = true, + [BPF_ALU | BPF_XOR | BPF_X] = true, + [BPF_ALU | BPF_LSH | BPF_K] = true, + [BPF_ALU | BPF_LSH | BPF_X] = true, + [BPF_ALU | BPF_RSH | BPF_K] = true, + [BPF_ALU | BPF_RSH | BPF_X] = true, + [BPF_ALU | BPF_NEG] = true, + /* Load instructions */ + [BPF_LD | BPF_W | BPF_ABS] = true, + [BPF_LD | BPF_H | BPF_ABS] = true, + [BPF_LD | BPF_B | BPF_ABS] = true, + [BPF_LD | BPF_W | BPF_LEN] = true, + [BPF_LD | BPF_W | BPF_IND] = true, + [BPF_LD | BPF_H | BPF_IND] = true, + [BPF_LD | BPF_B | BPF_IND] = true, + [BPF_LD | BPF_IMM] = true, + [BPF_LD | BPF_MEM] = true, + [BPF_LDX | BPF_W | BPF_LEN] = true, + [BPF_LDX | BPF_B | BPF_MSH] = true, + [BPF_LDX | BPF_IMM] = true, + [BPF_LDX | BPF_MEM] = true, + /* Store instructions */ + [BPF_ST] = true, + [BPF_STX] = true, + /* Misc instructions */ + [BPF_MISC | BPF_TAX] = true, + [BPF_MISC | BPF_TXA] = true, + /* Return instructions */ + [BPF_RET | BPF_K] = true, + [BPF_RET | BPF_A] = true, + /* Jump instructions */ + [BPF_JMP | BPF_JA] = true, + [BPF_JMP | BPF_JEQ | BPF_K] = true, + [BPF_JMP | BPF_JEQ | BPF_X] = true, + [BPF_JMP | BPF_JGE | BPF_K] = true, + [BPF_JMP | BPF_JGE | BPF_X] = true, + [BPF_JMP | BPF_JGT | BPF_K] = true, + [BPF_JMP | BPF_JGT | BPF_X] = true, + [BPF_JMP | BPF_JSET | BPF_K] = true, + [BPF_JMP | BPF_JSET | BPF_X] = true, + }; + + if (code_to_probe >= ARRAY_SIZE(codes)) + return false; + + return codes[code_to_probe]; +} + +/** + * sk_chk_filter - verify socket filter code + * @filter: filter to verify + * @flen: length of filter + * + * Check the user's filter code. If we let some ugly + * filter code slip through kaboom! The filter must contain + * no references or jumps that are out of range, no illegal + * instructions, and must end with a RET instruction. + * + * All jumps are forward as they are not signed. + * + * Returns 0 if the rule set is legal or -EINVAL if not. + */ +int sk_chk_filter(struct sock_filter *filter, unsigned int flen) +{ + bool anc_found; + int pc; + + if (flen == 0 || flen > BPF_MAXINSNS) + return -EINVAL; + + /* Check the filter code now */ + for (pc = 0; pc < flen; pc++) { + struct sock_filter *ftest = &filter[pc]; + + /* May we actually operate on this code? */ + if (!chk_code_allowed(ftest->code)) + return -EINVAL; + + /* Some instructions need special checks */ + switch (ftest->code) { + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + /* Check for division by zero */ + if (ftest->k == 0) + return -EINVAL; + break; + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: + case BPF_STX: + /* Check for invalid memory addresses */ + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + break; + case BPF_JMP | BPF_JA: + /* Note, the large ftest->k might cause loops. + * Compare this with conditional jumps below, + * where offsets are limited. --ANK (981016) + */ + if (ftest->k >= (unsigned int)(flen - pc - 1)) + return -EINVAL; + break; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* Both conditionals must be safe */ + if (pc + ftest->jt + 1 >= flen || + pc + ftest->jf + 1 >= flen) + return -EINVAL; + break; + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + anc_found = false; + if (bpf_anc_helper(ftest) & BPF_ANC) + anc_found = true; + /* Ancillary operation unknown or unsupported */ + if (anc_found == false && ftest->k >= SKF_AD_OFF) + return -EINVAL; + } + } + + /* Last instruction must be a RET code */ + switch (filter[flen - 1].code) { + case BPF_RET | BPF_K: + case BPF_RET | BPF_A: + return check_load_and_stores(filter, flen); + } + + return -EINVAL; +} +EXPORT_SYMBOL(sk_chk_filter); + +void __weak bpf_int_jit_compile(struct sk_filter *prog) +{ +} + +/** + * sk_filter_select_runtime - select execution runtime for BPF program + * @fp: sk_filter populated with internal BPF program + * + * try to JIT internal BPF program, if JIT is not available select interpreter + * BPF program will be executed via SK_RUN_FILTER() macro + */ +void sk_filter_select_runtime(struct sk_filter *fp) +{ + fp->bpf_func = (void *) __sk_run_filter; + + /* Probe if internal BPF can be JITed */ + bpf_int_jit_compile(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_select_runtime); + +/* free internal BPF program */ +void sk_filter_free(struct sk_filter *fp) +{ + bpf_jit_free(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_free); diff --git a/net/core/filter.c b/net/core/filter.c index 842f8393121d..9523677f735b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -45,54 +45,6 @@ #include #include -/* Registers */ -#define BPF_R0 regs[BPF_REG_0] -#define BPF_R1 regs[BPF_REG_1] -#define BPF_R2 regs[BPF_REG_2] -#define BPF_R3 regs[BPF_REG_3] -#define BPF_R4 regs[BPF_REG_4] -#define BPF_R5 regs[BPF_REG_5] -#define BPF_R6 regs[BPF_REG_6] -#define BPF_R7 regs[BPF_REG_7] -#define BPF_R8 regs[BPF_REG_8] -#define BPF_R9 regs[BPF_REG_9] -#define BPF_R10 regs[BPF_REG_10] - -/* Named registers */ -#define A regs[insn->a_reg] -#define X regs[insn->x_reg] -#define FP regs[BPF_REG_FP] -#define ARG1 regs[BPF_REG_ARG1] -#define CTX regs[BPF_REG_CTX] -#define K insn->imm - -/* No hurry in this branch - * - * Exported for the bpf jit load helper. - */ -void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) -{ - u8 *ptr = NULL; - - if (k >= SKF_NET_OFF) - ptr = skb_network_header(skb) + k - SKF_NET_OFF; - else if (k >= SKF_LL_OFF) - ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) - return ptr; - - return NULL; -} - -static inline void *load_pointer(const struct sk_buff *skb, int k, - unsigned int size, void *buffer) -{ - if (k >= 0) - return skb_header_pointer(skb, k, size, buffer); - - return bpf_internal_load_pointer_neg_helper(skb, k, size); -} - /** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff @@ -135,451 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sk_filter); -/* Base function for offset calculation. Needs to go into .text section, - * therefore keeping it non-static as well; will also be used by JITs - * anyway later on, so do not let the compiler omit it. - */ -noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) -{ - return 0; -} - -/** - * __sk_run_filter - run a filter on a given context - * @ctx: buffer to run the filter on - * @insn: filter to apply - * - * Decode and apply filter instructions to the skb->data. Return length to - * keep, 0 for none. @ctx is the data we are operating on, @insn is the - * array of filter instructions. - */ -static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) -{ - u64 stack[MAX_BPF_STACK / sizeof(u64)]; - u64 regs[MAX_BPF_REG], tmp; - static const void *jumptable[256] = { - [0 ... 255] = &&default_label, - /* Now overwrite non-defaults ... */ - /* 32 bit ALU operations */ - [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, - [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, - [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, - [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, - [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, - [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, - [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, - [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, - [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, - [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, - [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, - [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, - [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, - [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, - [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, - [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, - [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, - [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, - [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, - [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, - [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, - [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, - [BPF_ALU | BPF_NEG] = &&ALU_NEG, - [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, - [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, - /* 64 bit ALU operations */ - [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, - [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, - [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, - [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, - [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, - [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, - [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, - [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, - [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, - [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, - [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, - [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, - [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, - [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, - [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, - [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, - [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, - [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, - [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, - [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, - [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, - [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, - [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, - [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, - [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, - /* Call instruction */ - [BPF_JMP | BPF_CALL] = &&JMP_CALL, - /* Jumps */ - [BPF_JMP | BPF_JA] = &&JMP_JA, - [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, - [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, - [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, - [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, - [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, - [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, - [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, - [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, - [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, - [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, - [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, - [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, - [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, - [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, - /* Program return */ - [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, - /* Store instructions */ - [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, - [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, - [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, - [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, - [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, - [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, - [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, - [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, - [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, - [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, - /* Load instructions */ - [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, - [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, - [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, - [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, - [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W, - [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H, - [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B, - [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, - [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, - [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, - }; - void *ptr; - int off; - -#define CONT ({ insn++; goto select_insn; }) -#define CONT_JMP ({ insn++; goto select_insn; }) - - FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; - ARG1 = (u64) (unsigned long) ctx; - - /* Register for user BPF programs need to be reset first. */ - regs[BPF_REG_A] = 0; - regs[BPF_REG_X] = 0; - -select_insn: - goto *jumptable[insn->code]; - - /* ALU */ -#define ALU(OPCODE, OP) \ - ALU64_##OPCODE##_X: \ - A = A OP X; \ - CONT; \ - ALU_##OPCODE##_X: \ - A = (u32) A OP (u32) X; \ - CONT; \ - ALU64_##OPCODE##_K: \ - A = A OP K; \ - CONT; \ - ALU_##OPCODE##_K: \ - A = (u32) A OP (u32) K; \ - CONT; - - ALU(ADD, +) - ALU(SUB, -) - ALU(AND, &) - ALU(OR, |) - ALU(LSH, <<) - ALU(RSH, >>) - ALU(XOR, ^) - ALU(MUL, *) -#undef ALU - ALU_NEG: - A = (u32) -A; - CONT; - ALU64_NEG: - A = -A; - CONT; - ALU_MOV_X: - A = (u32) X; - CONT; - ALU_MOV_K: - A = (u32) K; - CONT; - ALU64_MOV_X: - A = X; - CONT; - ALU64_MOV_K: - A = K; - CONT; - ALU64_ARSH_X: - (*(s64 *) &A) >>= X; - CONT; - ALU64_ARSH_K: - (*(s64 *) &A) >>= K; - CONT; - ALU64_MOD_X: - if (unlikely(X == 0)) - return 0; - tmp = A; - A = do_div(tmp, X); - CONT; - ALU_MOD_X: - if (unlikely(X == 0)) - return 0; - tmp = (u32) A; - A = do_div(tmp, (u32) X); - CONT; - ALU64_MOD_K: - tmp = A; - A = do_div(tmp, K); - CONT; - ALU_MOD_K: - tmp = (u32) A; - A = do_div(tmp, (u32) K); - CONT; - ALU64_DIV_X: - if (unlikely(X == 0)) - return 0; - do_div(A, X); - CONT; - ALU_DIV_X: - if (unlikely(X == 0)) - return 0; - tmp = (u32) A; - do_div(tmp, (u32) X); - A = (u32) tmp; - CONT; - ALU64_DIV_K: - do_div(A, K); - CONT; - ALU_DIV_K: - tmp = (u32) A; - do_div(tmp, (u32) K); - A = (u32) tmp; - CONT; - ALU_END_TO_BE: - switch (K) { - case 16: - A = (__force u16) cpu_to_be16(A); - break; - case 32: - A = (__force u32) cpu_to_be32(A); - break; - case 64: - A = (__force u64) cpu_to_be64(A); - break; - } - CONT; - ALU_END_TO_LE: - switch (K) { - case 16: - A = (__force u16) cpu_to_le16(A); - break; - case 32: - A = (__force u32) cpu_to_le32(A); - break; - case 64: - A = (__force u64) cpu_to_le64(A); - break; - } - CONT; - - /* CALL */ - JMP_CALL: - /* Function call scratches BPF_R1-BPF_R5 registers, - * preserves BPF_R6-BPF_R9, and stores return value - * into BPF_R0. - */ - BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3, - BPF_R4, BPF_R5); - CONT; - - /* JMP */ - JMP_JA: - insn += insn->off; - CONT; - JMP_JEQ_X: - if (A == X) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JEQ_K: - if (A == K) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JNE_X: - if (A != X) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JNE_K: - if (A != K) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGT_X: - if (A > X) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGT_K: - if (A > K) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGE_X: - if (A >= X) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGE_K: - if (A >= K) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGT_X: - if (((s64) A) > ((s64) X)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGT_K: - if (((s64) A) > ((s64) K)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGE_X: - if (((s64) A) >= ((s64) X)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGE_K: - if (((s64) A) >= ((s64) K)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSET_X: - if (A & X) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSET_K: - if (A & K) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_EXIT: - return BPF_R0; - - /* STX and ST and LDX*/ -#define LDST(SIZEOP, SIZE) \ - STX_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = X; \ - CONT; \ - ST_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = K; \ - CONT; \ - LDX_MEM_##SIZEOP: \ - A = *(SIZE *)(unsigned long) (X + insn->off); \ - CONT; - - LDST(B, u8) - LDST(H, u16) - LDST(W, u32) - LDST(DW, u64) -#undef LDST - STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */ - atomic_add((u32) X, (atomic_t *)(unsigned long) - (A + insn->off)); - CONT; - STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ - atomic64_add((u64) X, (atomic64_t *)(unsigned long) - (A + insn->off)); - CONT; - LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */ - off = K; -load_word: - /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are - * only appearing in the programs where ctx == - * skb. All programs keep 'ctx' in regs[BPF_REG_CTX] - * == BPF_R6, sk_convert_filter() saves it in BPF_R6, - * internal BPF verifier will check that BPF_R6 == - * ctx. - * - * BPF_ABS and BPF_IND are wrappers of function calls, - * so they scratch BPF_R1-BPF_R5 registers, preserve - * BPF_R6-BPF_R9, and store return value into BPF_R0. - * - * Implicit input: - * ctx - * - * Explicit input: - * X == any register - * K == 32-bit immediate - * - * Output: - * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness - */ - - ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); - if (likely(ptr != NULL)) { - BPF_R0 = get_unaligned_be32(ptr); - CONT; - } - - return 0; - LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */ - off = K; -load_half: - ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); - if (likely(ptr != NULL)) { - BPF_R0 = get_unaligned_be16(ptr); - CONT; - } - - return 0; - LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */ - off = K; -load_byte: - ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); - if (likely(ptr != NULL)) { - BPF_R0 = *(u8 *)ptr; - CONT; - } - - return 0; - LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */ - off = K + X; - goto load_word; - LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */ - off = K + X; - goto load_half; - LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */ - off = K + X; - goto load_byte; - - default_label: - /* If we ever reach this, we have a bug somewhere. */ - WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); - return 0; -} - /* Helper to find the offset of pkt_type in sk_buff structure. We want * to make sure its still a 3bit field starting at a byte boundary; * taken from arch/x86/net/bpf_jit_comp.c. @@ -662,8 +169,8 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return prandom_u32(); } -static bool convert_bpf_extensions(struct sock_filter *fp, - struct sock_filter_int **insnp) +bool sk_convert_bpf_extensions(struct sock_filter *fp, + struct sock_filter_int **insnp) { struct sock_filter_int *insn = *insnp; @@ -796,505 +303,6 @@ static bool convert_bpf_extensions(struct sock_filter *fp, return true; } -/** - * sk_convert_filter - convert filter program - * @prog: the user passed filter program - * @len: the length of the user passed filter program - * @new_prog: buffer where converted program will be stored - * @new_len: pointer to store length of converted program - * - * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style. - * Conversion workflow: - * - * 1) First pass for calculating the new program length: - * sk_convert_filter(old_prog, old_len, NULL, &new_len) - * - * 2) 2nd pass to remap in two passes: 1st pass finds new - * jump offsets, 2nd pass remapping: - * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len); - * sk_convert_filter(old_prog, old_len, new_prog, &new_len); - * - * User BPF's register A is mapped to our BPF register 6, user BPF - * register X is mapped to BPF register 7; frame pointer is always - * register 10; Context 'void *ctx' is stored in register 1, that is, - * for socket filters: ctx == 'struct sk_buff *', for seccomp: - * ctx == 'struct seccomp_data *'. - */ -int sk_convert_filter(struct sock_filter *prog, int len, - struct sock_filter_int *new_prog, int *new_len) -{ - int new_flen = 0, pass = 0, target, i; - struct sock_filter_int *new_insn; - struct sock_filter *fp; - int *addrs = NULL; - u8 bpf_src; - - BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); - BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); - - if (len <= 0 || len >= BPF_MAXINSNS) - return -EINVAL; - - if (new_prog) { - addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL); - if (!addrs) - return -ENOMEM; - } - -do_pass: - new_insn = new_prog; - fp = prog; - - if (new_insn) - *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); - new_insn++; - - for (i = 0; i < len; fp++, i++) { - struct sock_filter_int tmp_insns[6] = { }; - struct sock_filter_int *insn = tmp_insns; - - if (addrs) - addrs[i] = new_insn - new_prog; - - switch (fp->code) { - /* All arithmetic insns and skb loads map as-is. */ - case BPF_ALU | BPF_ADD | BPF_X: - case BPF_ALU | BPF_ADD | BPF_K: - case BPF_ALU | BPF_SUB | BPF_X: - case BPF_ALU | BPF_SUB | BPF_K: - case BPF_ALU | BPF_AND | BPF_X: - case BPF_ALU | BPF_AND | BPF_K: - case BPF_ALU | BPF_OR | BPF_X: - case BPF_ALU | BPF_OR | BPF_K: - case BPF_ALU | BPF_LSH | BPF_X: - case BPF_ALU | BPF_LSH | BPF_K: - case BPF_ALU | BPF_RSH | BPF_X: - case BPF_ALU | BPF_RSH | BPF_K: - case BPF_ALU | BPF_XOR | BPF_X: - case BPF_ALU | BPF_XOR | BPF_K: - case BPF_ALU | BPF_MUL | BPF_X: - case BPF_ALU | BPF_MUL | BPF_K: - case BPF_ALU | BPF_DIV | BPF_X: - case BPF_ALU | BPF_DIV | BPF_K: - case BPF_ALU | BPF_MOD | BPF_X: - case BPF_ALU | BPF_MOD | BPF_K: - case BPF_ALU | BPF_NEG: - case BPF_LD | BPF_ABS | BPF_W: - case BPF_LD | BPF_ABS | BPF_H: - case BPF_LD | BPF_ABS | BPF_B: - case BPF_LD | BPF_IND | BPF_W: - case BPF_LD | BPF_IND | BPF_H: - case BPF_LD | BPF_IND | BPF_B: - /* Check for overloaded BPF extension and - * directly convert it if found, otherwise - * just move on with mapping. - */ - if (BPF_CLASS(fp->code) == BPF_LD && - BPF_MODE(fp->code) == BPF_ABS && - convert_bpf_extensions(fp, &insn)) - break; - - *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); - break; - - /* Jump transformation cannot use BPF block macros - * everywhere as offset calculation and target updates - * require a bit more work than the rest, i.e. jump - * opcodes map as-is, but offsets need adjustment. - */ - -#define BPF_EMIT_JMP \ - do { \ - if (target >= len || target < 0) \ - goto err; \ - insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ - /* Adjust pc relative offset for 2nd or 3rd insn. */ \ - insn->off -= insn - tmp_insns; \ - } while (0) - - case BPF_JMP | BPF_JA: - target = i + fp->k + 1; - insn->code = fp->code; - BPF_EMIT_JMP; - break; - - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JEQ | BPF_X: - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP | BPF_JSET | BPF_X: - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JGE | BPF_X: - if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { - /* BPF immediates are signed, zero extend - * immediate into tmp register and use it - * in compare insn. - */ - *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); - - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_TMP; - bpf_src = BPF_X; - } else { - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; - insn->imm = fp->k; - bpf_src = BPF_SRC(fp->code); - } - - /* Common case where 'jump_false' is next insn. */ - if (fp->jf == 0) { - insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; - target = i + fp->jt + 1; - BPF_EMIT_JMP; - break; - } - - /* Convert JEQ into JNE when 'jump_true' is next insn. */ - if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { - insn->code = BPF_JMP | BPF_JNE | bpf_src; - target = i + fp->jf + 1; - BPF_EMIT_JMP; - break; - } - - /* Other jumps are mapped into two insns: Jxx and JA. */ - target = i + fp->jt + 1; - insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; - BPF_EMIT_JMP; - insn++; - - insn->code = BPF_JMP | BPF_JA; - target = i + fp->jf + 1; - BPF_EMIT_JMP; - break; - - /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ - case BPF_LDX | BPF_MSH | BPF_B: - /* tmp = A */ - *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); - /* A = BPF_R0 = *(u8 *) (skb->data + K) */ - *insn++ = BPF_LD_ABS(BPF_B, fp->k); - /* A &= 0xf */ - *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); - /* A <<= 2 */ - *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); - /* X = A */ - *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); - /* A = tmp */ - *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); - break; - - /* RET_K, RET_A are remaped into 2 insns. */ - case BPF_RET | BPF_A: - case BPF_RET | BPF_K: - *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? - BPF_K : BPF_X, BPF_REG_0, - BPF_REG_A, fp->k); - *insn = BPF_EXIT_INSN(); - break; - - /* Store to stack. */ - case BPF_ST: - case BPF_STX: - *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == - BPF_ST ? BPF_REG_A : BPF_REG_X, - -(BPF_MEMWORDS - fp->k) * 4); - break; - - /* Load from stack. */ - case BPF_LD | BPF_MEM: - case BPF_LDX | BPF_MEM: - *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X, BPF_REG_FP, - -(BPF_MEMWORDS - fp->k) * 4); - break; - - /* A = K or X = K */ - case BPF_LD | BPF_IMM: - case BPF_LDX | BPF_IMM: - *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X, fp->k); - break; - - /* X = A */ - case BPF_MISC | BPF_TAX: - *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); - break; - - /* A = X */ - case BPF_MISC | BPF_TXA: - *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); - break; - - /* A = skb->len or X = skb->len */ - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LDX | BPF_W | BPF_LEN: - *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X, BPF_REG_CTX, - offsetof(struct sk_buff, len)); - break; - - /* Access seccomp_data fields. */ - case BPF_LDX | BPF_ABS | BPF_W: - /* A = *(u32 *) (ctx + K) */ - *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); - break; - - /* Unkown instruction. */ - default: - goto err; - } - - insn++; - if (new_prog) - memcpy(new_insn, tmp_insns, - sizeof(*insn) * (insn - tmp_insns)); - new_insn += insn - tmp_insns; - } - - if (!new_prog) { - /* Only calculating new length. */ - *new_len = new_insn - new_prog; - return 0; - } - - pass++; - if (new_flen != new_insn - new_prog) { - new_flen = new_insn - new_prog; - if (pass > 2) - goto err; - goto do_pass; - } - - kfree(addrs); - BUG_ON(*new_len != new_flen); - return 0; -err: - kfree(addrs); - return -EINVAL; -} - -/* Security: - * - * A BPF program is able to use 16 cells of memory to store intermediate - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). - * - * As we dont want to clear mem[] array for each packet going through - * sk_run_filter(), we check that filter loaded by user never try to read - * a cell if not previously written, and we check all branches to be sure - * a malicious user doesn't try to abuse us. - */ -static int check_load_and_stores(struct sock_filter *filter, int flen) -{ - u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ - int pc, ret = 0; - - BUILD_BUG_ON(BPF_MEMWORDS > 16); - - masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); - if (!masks) - return -ENOMEM; - - memset(masks, 0xff, flen * sizeof(*masks)); - - for (pc = 0; pc < flen; pc++) { - memvalid &= masks[pc]; - - switch (filter[pc].code) { - case BPF_ST: - case BPF_STX: - memvalid |= (1 << filter[pc].k); - break; - case BPF_LD | BPF_MEM: - case BPF_LDX | BPF_MEM: - if (!(memvalid & (1 << filter[pc].k))) { - ret = -EINVAL; - goto error; - } - break; - case BPF_JMP | BPF_JA: - /* A jump must set masks on target */ - masks[pc + 1 + filter[pc].k] &= memvalid; - memvalid = ~0; - break; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JEQ | BPF_X: - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP | BPF_JSET | BPF_X: - /* A jump must set masks on targets */ - masks[pc + 1 + filter[pc].jt] &= memvalid; - masks[pc + 1 + filter[pc].jf] &= memvalid; - memvalid = ~0; - break; - } - } -error: - kfree(masks); - return ret; -} - -static bool chk_code_allowed(u16 code_to_probe) -{ - static const bool codes[] = { - /* 32 bit ALU operations */ - [BPF_ALU | BPF_ADD | BPF_K] = true, - [BPF_ALU | BPF_ADD | BPF_X] = true, - [BPF_ALU | BPF_SUB | BPF_K] = true, - [BPF_ALU | BPF_SUB | BPF_X] = true, - [BPF_ALU | BPF_MUL | BPF_K] = true, - [BPF_ALU | BPF_MUL | BPF_X] = true, - [BPF_ALU | BPF_DIV | BPF_K] = true, - [BPF_ALU | BPF_DIV | BPF_X] = true, - [BPF_ALU | BPF_MOD | BPF_K] = true, - [BPF_ALU | BPF_MOD | BPF_X] = true, - [BPF_ALU | BPF_AND | BPF_K] = true, - [BPF_ALU | BPF_AND | BPF_X] = true, - [BPF_ALU | BPF_OR | BPF_K] = true, - [BPF_ALU | BPF_OR | BPF_X] = true, - [BPF_ALU | BPF_XOR | BPF_K] = true, - [BPF_ALU | BPF_XOR | BPF_X] = true, - [BPF_ALU | BPF_LSH | BPF_K] = true, - [BPF_ALU | BPF_LSH | BPF_X] = true, - [BPF_ALU | BPF_RSH | BPF_K] = true, - [BPF_ALU | BPF_RSH | BPF_X] = true, - [BPF_ALU | BPF_NEG] = true, - /* Load instructions */ - [BPF_LD | BPF_W | BPF_ABS] = true, - [BPF_LD | BPF_H | BPF_ABS] = true, - [BPF_LD | BPF_B | BPF_ABS] = true, - [BPF_LD | BPF_W | BPF_LEN] = true, - [BPF_LD | BPF_W | BPF_IND] = true, - [BPF_LD | BPF_H | BPF_IND] = true, - [BPF_LD | BPF_B | BPF_IND] = true, - [BPF_LD | BPF_IMM] = true, - [BPF_LD | BPF_MEM] = true, - [BPF_LDX | BPF_W | BPF_LEN] = true, - [BPF_LDX | BPF_B | BPF_MSH] = true, - [BPF_LDX | BPF_IMM] = true, - [BPF_LDX | BPF_MEM] = true, - /* Store instructions */ - [BPF_ST] = true, - [BPF_STX] = true, - /* Misc instructions */ - [BPF_MISC | BPF_TAX] = true, - [BPF_MISC | BPF_TXA] = true, - /* Return instructions */ - [BPF_RET | BPF_K] = true, - [BPF_RET | BPF_A] = true, - /* Jump instructions */ - [BPF_JMP | BPF_JA] = true, - [BPF_JMP | BPF_JEQ | BPF_K] = true, - [BPF_JMP | BPF_JEQ | BPF_X] = true, - [BPF_JMP | BPF_JGE | BPF_K] = true, - [BPF_JMP | BPF_JGE | BPF_X] = true, - [BPF_JMP | BPF_JGT | BPF_K] = true, - [BPF_JMP | BPF_JGT | BPF_X] = true, - [BPF_JMP | BPF_JSET | BPF_K] = true, - [BPF_JMP | BPF_JSET | BPF_X] = true, - }; - - if (code_to_probe >= ARRAY_SIZE(codes)) - return false; - - return codes[code_to_probe]; -} - -/** - * sk_chk_filter - verify socket filter code - * @filter: filter to verify - * @flen: length of filter - * - * Check the user's filter code. If we let some ugly - * filter code slip through kaboom! The filter must contain - * no references or jumps that are out of range, no illegal - * instructions, and must end with a RET instruction. - * - * All jumps are forward as they are not signed. - * - * Returns 0 if the rule set is legal or -EINVAL if not. - */ -int sk_chk_filter(struct sock_filter *filter, unsigned int flen) -{ - bool anc_found; - int pc; - - if (flen == 0 || flen > BPF_MAXINSNS) - return -EINVAL; - - /* Check the filter code now */ - for (pc = 0; pc < flen; pc++) { - struct sock_filter *ftest = &filter[pc]; - - /* May we actually operate on this code? */ - if (!chk_code_allowed(ftest->code)) - return -EINVAL; - - /* Some instructions need special checks */ - switch (ftest->code) { - case BPF_ALU | BPF_DIV | BPF_K: - case BPF_ALU | BPF_MOD | BPF_K: - /* Check for division by zero */ - if (ftest->k == 0) - return -EINVAL; - break; - case BPF_LD | BPF_MEM: - case BPF_LDX | BPF_MEM: - case BPF_ST: - case BPF_STX: - /* Check for invalid memory addresses */ - if (ftest->k >= BPF_MEMWORDS) - return -EINVAL; - break; - case BPF_JMP | BPF_JA: - /* Note, the large ftest->k might cause loops. - * Compare this with conditional jumps below, - * where offsets are limited. --ANK (981016) - */ - if (ftest->k >= (unsigned int)(flen - pc - 1)) - return -EINVAL; - break; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JEQ | BPF_X: - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP | BPF_JSET | BPF_X: - /* Both conditionals must be safe */ - if (pc + ftest->jt + 1 >= flen || - pc + ftest->jf + 1 >= flen) - return -EINVAL; - break; - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - anc_found = false; - if (bpf_anc_helper(ftest) & BPF_ANC) - anc_found = true; - /* Ancillary operation unknown or unsupported */ - if (anc_found == false && ftest->k >= SKF_AD_OFF) - return -EINVAL; - } - } - - /* Last instruction must be a RET code */ - switch (filter[flen - 1].code) { - case BPF_RET | BPF_K: - case BPF_RET | BPF_A: - return check_load_and_stores(filter, flen); - } - - return -EINVAL; -} -EXPORT_SYMBOL(sk_chk_filter); - static int sk_store_orig_filter(struct sk_filter *fp, const struct sock_fprog *fprog) { @@ -1456,33 +464,6 @@ out_err: return ERR_PTR(err); } -void __weak bpf_int_jit_compile(struct sk_filter *prog) -{ -} - -/** - * sk_filter_select_runtime - select execution runtime for BPF program - * @fp: sk_filter populated with internal BPF program - * - * try to JIT internal BPF program, if JIT is not available select interpreter - * BPF program will be executed via SK_RUN_FILTER() macro - */ -void sk_filter_select_runtime(struct sk_filter *fp) -{ - fp->bpf_func = (void *) __sk_run_filter; - - /* Probe if internal BPF can be JITed */ - bpf_int_jit_compile(fp); -} -EXPORT_SYMBOL_GPL(sk_filter_select_runtime); - -/* free internal BPF program */ -void sk_filter_free(struct sk_filter *fp) -{ - bpf_jit_free(fp); -} -EXPORT_SYMBOL_GPL(sk_filter_free); - static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, struct sock *sk) { -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/