Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S966782AbbLQF1L (ORCPT ); Thu, 17 Dec 2015 00:27:11 -0500 Received: from szxga03-in.huawei.com ([119.145.14.66]:12734 "EHLO szxga03-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753719AbbLQF1H (ORCPT ); Thu, 17 Dec 2015 00:27:07 -0500 From: Wang Nan To: , , , , , , , , , , , CC: , , Wang Nan Subject: [PATCH 08/10] bpf samples: Add utils.[ch] for using BPF Date: Thu, 17 Dec 2015 05:23:12 +0000 Message-ID: <1450329794-161948-9-git-send-email-wangnan0@huawei.com> X-Mailer: git-send-email 1.8.3.4 In-Reply-To: <1450329794-161948-1-git-send-email-wangnan0@huawei.com> References: <1450329794-161948-1-git-send-email-wangnan0@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.107.193.248] X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A020205.567246D9.0011,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=0.0.0.0, so=2013-05-26 15:14:31, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: 8d32cb9e443444bc37cfe461584d3795 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 15818 Lines: 603 We are going to uses libbpf to replace old libbpf.[ch] and bpf_load.[ch]. This is the first patch of this work. In this patch, several macros and helpers in libbpf.[ch] and bpf_load.[ch] are merged into utils.[ch]. utils.[ch] utilizes libbpf in tools/lib to deal with BPF related things. They would be compiled after Makefile changes. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Alex Gartrell Cc: Arnaldo Carvalho de Melo Cc: Brenden Blanco Cc: Daniel Borkmann Cc: Daniel Wagner Cc: David S. Miller Cc: Ingo Molnar Cc: Kaixu Xia Cc: Michael Holzheu Cc: Yang Shi --- samples/bpf/include/linux/err.h | 56 ++++++++ samples/bpf/utils.c | 276 ++++++++++++++++++++++++++++++++++++++++ samples/bpf/utils.h | 217 +++++++++++++++++++++++++++++++ 3 files changed, 549 insertions(+) create mode 100644 samples/bpf/include/linux/err.h create mode 100644 samples/bpf/utils.c create mode 100644 samples/bpf/utils.h diff --git a/samples/bpf/include/linux/err.h b/samples/bpf/include/linux/err.h new file mode 100644 index 0000000..671b874 --- /dev/null +++ b/samples/bpf/include/linux/err.h @@ -0,0 +1,56 @@ +#ifndef __TOOLS_LINUX_ERR_H +#define __TOOLS_LINUX_ERR_H + +#include + +#ifndef __must_check +# define __must_check +#endif +#ifndef __force +# define __force +#endif +#ifndef unlikely +# define unlikely(x) x +#endif + +/* + * Original kernel header comment: + * + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a normal + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + * + * Userspace note: + * The same principle works for userspace, because 'error' pointers + * fall down to the unused hole far from user space, as described + * in Documentation/x86/x86_64/mm.txt for x86_64 arch: + * + * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension + * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole + * + * It should be the same case for other architectures, because + * this code is used in generic kernel code. + */ +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error_) +{ + return (void *) error_; +} + +static inline long __must_check PTR_ERR(__force const void *ptr) +{ + return (long) ptr; +} + +static inline bool __must_check IS_ERR(__force const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +#endif /* _LINUX_ERR_H */ diff --git a/samples/bpf/utils.c b/samples/bpf/utils.c new file mode 100644 index 0000000..73262a9 --- /dev/null +++ b/samples/bpf/utils.c @@ -0,0 +1,276 @@ +/* eBPF mini library */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#define DEBUGFS "/sys/kernel/debug/tracing/" + +int open_raw_sock(const char *name) +{ + struct sockaddr_ll sll; + int sock; + + sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); + if (sock < 0) { + printf("cannot create raw socket\n"); + return -1; + } + + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = if_nametoindex(name); + sll.sll_protocol = htons(ETH_P_ALL); + if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { + printf("bind to %s: %s\n", name, strerror(errno)); + close(sock); + return -1; + } + + return sock; +} + +void read_trace_pipe(void) +{ + int trace_fd; + + trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (trace_fd < 0) + return; + + while (1) { + static char buf[4096]; + ssize_t sz; + + sz = read(trace_fd, buf, sizeof(buf)); + if (sz > 0) { + buf[sz] = 0; + puts(buf); + } + } +} + +int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, + int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, attr, pid, cpu, + group_fd, flags); +} + +static int prog_load_prep(struct bpf_program *prog, int n, + struct bpf_insn *insns, int insns_cnt, + struct bpf_prog_prep_result *res) +{ + enum bpf_prog_type prog_type; + int is_socket, is_kprobe, is_kretprobe; + const char *event = bpf_program__title(prog, false); + + LIBBPF_PTR_ASSERT(event, return -1); + + is_socket = strncmp(event, "socket", 6) == 0; + is_kprobe = strncmp(event, "kprobe/", 7) == 0; + is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; + + if (is_socket) { + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + } else if (is_kprobe || is_kretprobe) { + prog_type = BPF_PROG_TYPE_KPROBE; + } else { + fprintf(stderr, "Unknown event '%s'\n", event); + return -1; + } + + LIBBPF_ASSERT(bpf_program__set_type(prog, prog_type), return -1); + res->new_insn_ptr = insns; + res->new_insn_cnt = insns_cnt; + return 0; +} + +static int populate_prog_array(int map_fd, struct bpf_object *obj) +{ + struct bpf_program *prog; + + if (map_fd < 0) { + fprintf(stderr, "Invalid map fd\n"); + return -1; + } + + bpf_object__for_each_program(prog, obj) { + const char *event = bpf_program__title(prog, false); + int ind, prog_fd; + const char *ptr; + + LIBBPF_PTR_ASSERT(event, return -1); + ptr = event + strlen(event) - 1; + while (isdigit(*ptr)) + ptr--; + ptr++; + if (!isdigit(*ptr)) { + fprintf(stderr, "Invalid event: %s\n", event); + return -1; + } + + ind = atoi(ptr); + + __LIBBPF_ASSERT(prog_fd = bpf_program__nth_fd(prog, 0), + >= 0, return -1); + LIBBPF_ASSERT(bpf_map_update_elem(map_fd, &ind, + &prog_fd, BPF_ANY), + return -1); + } + return 0; +} + +static int create_kprobes(int fd, const char *event, bool is_kprobe) +{ + char buf[256]; + int efd, err, id; + struct perf_event_attr attr = {}; + + if (isdigit(event[0])) + return 0; + + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + + snprintf(buf, sizeof(buf), + "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", + is_kprobe ? 'p' : 'r', event, event); + + err = system(buf); + if (err < 0) { + fprintf(stderr, "failed to create kprobe '%s' error '%s'\n", + event, strerror(errno)); + return -1; + } + + strcpy(buf, DEBUGFS); + strcat(buf, "events/kprobes/"); + strcat(buf, event); + strcat(buf, "/id"); + + efd = open(buf, O_RDONLY, 0); + if (efd < 0) { + fprintf(stderr, "failed to open event %s\n", event); + return -1; + } + + err = read(efd, buf, sizeof(buf)); + if (err < 0 || err >= sizeof(buf)) { + fprintf(stderr, "read from '%s' failed '%s'\n", + event, strerror(errno)); + return -1; + } + + close(efd); + + buf[err] = 0; + id = atoi(buf); + attr.config = id; + + efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + if (efd < 0) { + fprintf(stderr, "event %d fd %d err %s\n", id, efd, + strerror(errno)); + return -1; + } + + ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + return 0; +} + +struct bpf_object *load_bpf_file(char *path) +{ + struct bpf_program *prog; + struct bpf_object *obj; + struct bpf_map *map; + int err; + + /* clear all kprobes */ + err = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events"); + if (err) + fprintf(stderr, "WARNING: clear kprobe_events failed: %s\n", strerror(errno)); + + LIBBPF_PTR_ASSERT(obj = bpf_object__open(path), return NULL); + + bpf_object__for_each_program(prog, obj) + LIBBPF_ASSERT(bpf_program__set_prep(prog, 1, prog_load_prep), + goto errout); + + LIBBPF_ASSERT(bpf_object__load(obj), goto errout); + + bpf_map__for_each(map, obj) { + struct bpf_map_def def; + + LIBBPF_ASSERT(bpf_map__get_def(map, &def), goto errout); + if (def.type == BPF_MAP_TYPE_PROG_ARRAY) { + if (populate_prog_array(bpf_map__get_fd(map), obj)) { + fprintf(stderr, "failed to populate program array\n"); + goto errout; + } + } + } + + bpf_object__for_each_program(prog, obj) { + const char *event = bpf_program__title(prog, false); + int fd, err; + + LIBBPF_PTR_ASSERT(event, goto errout); + __LIBBPF_ASSERT(fd = bpf_program__nth_fd(prog, 0), + >= 0, + goto errout); + + if (strncmp(event, "kprobe/", 7) == 0) + err = create_kprobes(fd, event + 7, true); + else if (strncmp(event, "kretprobe/", 10) == 0) + err = create_kprobes(fd, event + 10, false); + + if (err) { + fprintf(stderr, "failed to create kprobes\n"); + goto errout; + } + } + + return obj; +errout: + bpf_object__close(obj); + return NULL; +} + +int get_prog_fd(struct bpf_object *obj, int idx) +{ + int i = 0; + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) + if (i++ == idx) + return bpf_program__nth_fd(prog, 0); + return -1; +} + +int get_map_fd(struct bpf_object *obj, int idx) +{ + int i = 0; + struct bpf_map *map; + + bpf_map__for_each(map, obj) + if (i++ == idx) + return bpf_map__get_fd(map); + return -1; +} diff --git a/samples/bpf/utils.h b/samples/bpf/utils.h new file mode 100644 index 0000000..5962a68 --- /dev/null +++ b/samples/bpf/utils.h @@ -0,0 +1,217 @@ +#ifndef __SAMPELS_UTILS_H +#define __SAMPELS_UTILS_H + +#include +#include + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#define __LIBBPF_ASSERT(stat, cond, ret) do { \ + char ___errbuf[256]; \ + int ___err = stat; \ + \ + if ((___err) cond) \ + break; \ + libbpf_strerror(___err, ___errbuf, sizeof(___errbuf));\ + fprintf(stderr, "libbpf error: %s\n", ___errbuf);\ + ret; \ +} while(0) + +#define __LIBBPF_PTR_ASSERT(stat, cond, ret) do { \ + const void *___ptr = stat; \ + \ + if (!IS_ERR(___ptr) && ___ptr) \ + break; \ + if (!___ptr) \ + ___ptr = ERR_PTR(-EEXIST); \ + LIBBPF_ASSERT(PTR_ERR(___ptr), ret); \ +} while(0) + +#define LIBBPF_ASSERT(stat, ret) __LIBBPF_ASSERT(stat, == 0, ret) +#define LIBBPF_PTR_ASSERT(stat, ret) __LIBBPF_PTR_ASSERT(stat, == 0, ret) + +/* create RAW socket and bind to interface 'name' */ +int open_raw_sock(const char *name); +void read_trace_pipe(void); + +struct perf_event_attr; +int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, + int group_fd, unsigned long flags); + +int prog_load_prepare(struct bpf_program *prog, int n, + struct bpf_insn *insns, int insns_cnt, + struct bpf_prog_prep_result *res); + +struct bpf_object *load_bpf_file(char *path); +int get_prog_fd(struct bpf_object *obj, int idx); +int get_map_fd(struct bpf_object *obj, int idx); +#endif -- 1.8.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/