Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754714AbaF1AGv (ORCPT ); Fri, 27 Jun 2014 20:06:51 -0400 Received: from mail-pa0-f52.google.com ([209.85.220.52]:49585 "EHLO mail-pa0-f52.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754669AbaF1AGp (ORCPT ); Fri, 27 Jun 2014 20:06:45 -0400 From: Alexei Starovoitov To: "David S. Miller" Cc: Ingo Molnar , Linus Torvalds , Steven Rostedt , Daniel Borkmann , Chema Gonzalez , Eric Dumazet , Peter Zijlstra , Arnaldo Carvalho de Melo , Jiri Olsa , Thomas Gleixner , "H. Peter Anvin" , Andrew Morton , Kees Cook , linux-api@vger.kernel.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH RFC net-next 10/14] net: sock: allow eBPF programs to be attached to sockets Date: Fri, 27 Jun 2014 17:06:02 -0700 Message-Id: <1403913966-4927-11-git-send-email-ast@plumgrid.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1403913966-4927-1-git-send-email-ast@plumgrid.com> References: <1403913966-4927-1-git-send-email-ast@plumgrid.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org introduce new setsockopt() command: int prog_id; setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER_EBPF, &prog_id, sizeof(prog_id)) prog_id is eBPF program id priorly loaded via: prog_id = syscall(__NR_bpf, BPF_PROG_LOAD, 0, BPF_PROG_TYPE_SOCKET_FILTER, &prog, sizeof(prog)); setsockopt() calls bpf_prog_get() which increment refcnt of the program, so it doesn't get unloaded while socket is using the program. The same eBPF program can be attached to different sockets. Program exit automatically closes socket which calls sk_filter_uncharge() which decrements refcnt of eBPF program Signed-off-by: Alexei Starovoitov --- arch/alpha/include/uapi/asm/socket.h | 2 + arch/avr32/include/uapi/asm/socket.h | 2 + arch/cris/include/uapi/asm/socket.h | 2 + arch/frv/include/uapi/asm/socket.h | 2 + arch/ia64/include/uapi/asm/socket.h | 2 + arch/m32r/include/uapi/asm/socket.h | 2 + arch/mips/include/uapi/asm/socket.h | 2 + arch/mn10300/include/uapi/asm/socket.h | 2 + arch/parisc/include/uapi/asm/socket.h | 2 + arch/powerpc/include/uapi/asm/socket.h | 2 + arch/s390/include/uapi/asm/socket.h | 2 + arch/sparc/include/uapi/asm/socket.h | 2 + arch/xtensa/include/uapi/asm/socket.h | 2 + include/linux/filter.h | 1 + include/uapi/asm-generic/socket.h | 2 + net/core/filter.c | 117 ++++++++++++++++++++++++++++++++ net/core/sock.c | 13 ++++ 17 files changed, 159 insertions(+) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 3de1394bcab8..8c83c376b5ba 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -87,4 +87,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 6e6cd159924b..498ef7220466 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index ed94e5ed0a23..0d5120724780 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -82,6 +82,8 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index ca2c6e6f31c6..81fba267c285 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -80,5 +80,7 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index a1b49bac7951..9cbb2e82fa7c 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -89,4 +89,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 6c9a24b3aefa..587ac2fb4106 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index a14baa218c76..ab1aed2306db 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -98,4 +98,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 6aa3ce1854aa..1c4f916d0ef1 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index fe35ceacf0e7..d189bb79ca07 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_BPF_EXTENSIONS 0x4029 +#define SO_ATTACH_FILTER_EBPF 0x402a + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a9c3e2e18c05..88488f24ae7f 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -87,4 +87,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index e031332096d7..c5f26af90366 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -86,4 +86,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 54d9608681b6..667ed3fa63f2 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -76,6 +76,8 @@ #define SO_BPF_EXTENSIONS 0x0032 +#define SO_ATTACH_FILTER_EBPF 0x0033 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 39acec0cf0b1..24f3e4434979 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -91,4 +91,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 9873cc8fd31b..7412cfce84f9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -72,6 +72,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp, void sk_unattached_filter_destroy(struct sk_filter *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_attach_filter_ebpf(u32 prog_id, struct sock *sk); int sk_detach_filter(struct sock *sk); int sk_chk_filter(struct sock_filter *filter, unsigned int flen); diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index ea0796bdcf88..f41844e9ac07 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -82,4 +82,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/filter.c b/net/core/filter.c index 7f7c61b4aa39..11a54295f693 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -44,6 +44,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1117,6 +1118,122 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } EXPORT_SYMBOL_GPL(sk_attach_filter); +int sk_attach_filter_ebpf(u32 prog_id, struct sock *sk) +{ + struct sk_filter *fp, *old_fp; + + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + + fp = bpf_prog_get(prog_id); + if (!fp) + return -EINVAL; + + if (fp->info->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { + /* valid prog_id, but invalid filter type */ + sk_filter_release(fp); + return -EINVAL; + } + + old_fp = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + rcu_assign_pointer(sk->sk_filter, fp); + + if (old_fp) + sk_filter_uncharge(sk, old_fp); + + return 0; +} + +static struct bpf_func_proto sock_filter_funcs[] = { + [BPF_FUNC_map_lookup_elem] = { + .ret_type = PTR_TO_MAP_CONDITIONAL, + .arg1_type = CONST_ARG_MAP_ID, + .arg2_type = PTR_TO_STACK_IMM_MAP_KEY, + }, + [BPF_FUNC_map_update_elem] = { + .ret_type = RET_INTEGER, + .arg1_type = CONST_ARG_MAP_ID, + .arg2_type = PTR_TO_STACK_IMM_MAP_KEY, + .arg3_type = PTR_TO_STACK_IMM_MAP_VALUE, + }, + [BPF_FUNC_map_delete_elem] = { + .ret_type = RET_INTEGER, + .arg1_type = CONST_ARG_MAP_ID, + .arg2_type = PTR_TO_STACK_IMM_MAP_KEY, + .arg3_type = PTR_TO_STACK_IMM_MAP_VALUE, + }, +}; + +/* allow socket filters to call + * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() + */ +static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id) +{ + if (func_id < 0 || func_id >= ARRAY_SIZE(sock_filter_funcs)) + return NULL; + return &sock_filter_funcs[func_id]; +} + +static const struct bpf_context_access { + int size; + enum bpf_access_type type; +} sock_filter_ctx_access[] = { + [offsetof(struct sk_buff, mark)] = { + FIELD_SIZEOF(struct sk_buff, mark), BPF_READ + }, + [offsetof(struct sk_buff, protocol)] = { + FIELD_SIZEOF(struct sk_buff, protocol), BPF_READ + }, + [offsetof(struct sk_buff, queue_mapping)] = { + FIELD_SIZEOF(struct sk_buff, queue_mapping), BPF_READ + }, +}; + +/* allow socket filters to access to 'mark', 'protocol' and 'queue_mapping' + * fields of 'struct sk_buff' + */ +static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type) +{ + const struct bpf_context_access *access; + + if (off < 0 || off >= ARRAY_SIZE(sock_filter_ctx_access)) + return false; + + access = &sock_filter_ctx_access[off]; + if (access->size == size && (access->type & type)) + return true; + + return false; +} + +static struct bpf_verifier_ops sock_filter_ops = { + .get_func_proto = sock_filter_func_proto, + .is_valid_access = sock_filter_is_valid_access, +}; + +static struct bpf_prog_type_list tl = { + .ops = &sock_filter_ops, + .type = BPF_PROG_TYPE_SOCKET_FILTER, +}; + +static int __init register_sock_filter_ops(void) +{ + /* init function offsets used to convert BPF_FUNC_* constants in + * BPF_CALL instructions to offset of helper functions + */ + sock_filter_funcs[BPF_FUNC_map_lookup_elem].func_off = + bpf_map_lookup_elem - __bpf_call_base; + sock_filter_funcs[BPF_FUNC_map_update_elem].func_off = + bpf_map_update_elem - __bpf_call_base; + sock_filter_funcs[BPF_FUNC_map_delete_elem].func_off = + bpf_map_delete_elem - __bpf_call_base; + + bpf_register_prog_type(&tl); + return 0; +} +late_initcall(register_sock_filter_ops); + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/sock.c b/net/core/sock.c index 026e01f70274..2f9f7b74a551 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -895,6 +895,19 @@ set_rcvbuf: } break; + case SO_ATTACH_FILTER_EBPF: + ret = -EINVAL; + if (optlen == sizeof(u32)) { + u32 prog_id; + + ret = -EFAULT; + if (copy_from_user(&prog_id, optval, sizeof(prog_id))) + break; + + ret = sk_attach_filter_ebpf(prog_id, sk); + } + break; + case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/