This is a grab bag of changes to the bpf testing infrastructure I
developed working on MIPS eBPF JIT support. The change to
bpf_jit_disasm is probably universally beneficial, the others are more
MIPS specific.
David Daney (4):
tools: bpf_jit_disasm: Handle large images.
test_bpf: Add test to make conditional jump cross a large number of
insns.
bpf: Add MIPS support to samples/bpf.
samples/bpf: Fix tracex5 to work with MIPS syscalls.
lib/test_bpf.c | 32 ++++++++++++++++++++++++++++++++
samples/bpf/Makefile | 13 +++++++++++++
samples/bpf/bpf_helpers.h | 13 +++++++++++++
samples/bpf/syscall_nrs.c | 12 ++++++++++++
samples/bpf/tracex5_kern.c | 11 ++++++++---
tools/net/bpf_jit_disasm.c | 37 ++++++++++++++++++++++++++-----------
6 files changed, 104 insertions(+), 14 deletions(-)
create mode 100644 samples/bpf/syscall_nrs.c
--
2.9.4
On MIPS, conditional branches can only span 32k instructions. To
exceed this limit in the JIT with the BPF maximum of 4k insns, we need
to choose eBPF insns that expand to more than 8 machine instructions.
Use BPF_LD_ABS as it is quite complex. This forces the JIT to invert
the sense of the branch to branch around a long jump to the end.
This (somewhat) verifies that the branch inversion logic and target
address calculation of the long jumps are done correctly.
Signed-off-by: David Daney <[email protected]>
---
lib/test_bpf.c | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index be88cba..9ecbf47 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -434,6 +434,30 @@ static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
return 0;
}
+static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+{
+ unsigned int len = BPF_MAXINSNS;
+ struct bpf_insn *insn;
+ int i = 0;
+
+ insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+ if (!insn)
+ return -ENOMEM;
+
+ insn[i++] = BPF_MOV64_REG(R6, R1);
+ insn[i++] = BPF_LD_ABS(BPF_B, 0);
+ insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2);
+ i++;
+ while (i < len - 1)
+ insn[i++] = BPF_LD_ABS(BPF_B, 1);
+ insn[i] = BPF_EXIT_INSN();
+
+ self->u.ptr.insns = insn;
+ self->u.ptr.len = len;
+
+ return 0;
+}
+
static int __bpf_fill_stxdw(struct bpf_test *self, int size)
{
unsigned int len = BPF_MAXINSNS;
@@ -5022,6 +5046,14 @@ static struct bpf_test tests[] = {
{ { ETH_HLEN, 0xbef } },
.fill_helper = bpf_fill_ld_abs_vlan_push_pop,
},
+ {
+ "BPF_MAXINSNS: jump around ld_abs",
+ { },
+ INTERNAL,
+ { 10, 11 },
+ { { 2, 10 } },
+ .fill_helper = bpf_fill_jump_around_ld_abs,
+ },
/*
* LD_IND / LD_ABS on fragmented SKBs
*/
--
2.9.4
Signed-off-by: David Daney <[email protected]>
---
samples/bpf/bpf_helpers.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 9a9c95f..76526da 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -135,6 +135,19 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->pc)
+#elif defined(__mips__)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
#elif defined(__powerpc__)
#define PT_REGS_PARM1(x) ((x)->gpr[3])
--
2.9.4
There are two problems:
1) In MIPS the __NR_* macros expand to an expression, this causes the
sections of the object file to be named like:
.
.
.
[ 5] kprobe/(5000 + 1) PROGBITS 0000000000000000 000160 ...
[ 6] kprobe/(5000 + 0) PROGBITS 0000000000000000 000258 ...
[ 7] kprobe/(5000 + 9) PROGBITS 0000000000000000 000348 ...
.
.
.
The fix here is to use the "asm_offsets" trick to evaluate the macros
in the C compiler and generate a header file with a usable form of the
macros.
2) MIPS syscall numbers start at 5000, so we need a bigger map to hold
the sub-programs.
Signed-off-by: David Daney <[email protected]>
---
samples/bpf/Makefile | 13 +++++++++++++
samples/bpf/syscall_nrs.c | 12 ++++++++++++
samples/bpf/tracex5_kern.c | 11 ++++++++---
3 files changed, 33 insertions(+), 3 deletions(-)
create mode 100644 samples/bpf/syscall_nrs.c
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6c7468e..a0561dc 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -160,6 +160,17 @@ clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@rm -f *~
+$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
+ $(call if_changed_dep,cc_s_c)
+
+$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
+ $(call filechk,offsets,__SYSCALL_NRS_H__)
+
+clean-files += syscall_nrs.h
+
+FORCE:
+
+
# Verify LLVM compiler tools are available and bpf target is supported by llc
.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
@@ -180,6 +191,8 @@ verify_target_bpf: verify_cmds
$(src)/*.c: verify_target_bpf
+$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
+
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c
new file mode 100644
index 0000000..ce2a30b
--- /dev/null
+++ b/samples/bpf/syscall_nrs.c
@@ -0,0 +1,12 @@
+#include <uapi/linux/unistd.h>
+#include <linux/kbuild.h>
+
+#define SYSNR(_NR) DEFINE(SYS ## _NR, _NR)
+
+void syscall_defines(void)
+{
+ COMMENT("Linux system call numbers.");
+ SYSNR(__NR_write);
+ SYSNR(__NR_read);
+ SYSNR(__NR_mmap);
+}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index 7e4cf74..f57f4e1 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -9,6 +9,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/seccomp.h>
#include <uapi/linux/unistd.h>
+#include "syscall_nrs.h"
#include "bpf_helpers.h"
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
@@ -17,7 +18,11 @@ struct bpf_map_def SEC("maps") progs = {
.type = BPF_MAP_TYPE_PROG_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u32),
+#ifdef __mips__
+ .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+#else
.max_entries = 1024,
+#endif
};
SEC("kprobe/__seccomp_filter")
@@ -37,7 +42,7 @@ int bpf_prog1(struct pt_regs *ctx)
}
/* we jump here when syscall number == __NR_write */
-PROG(__NR_write)(struct pt_regs *ctx)
+PROG(SYS__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd;
@@ -50,7 +55,7 @@ PROG(__NR_write)(struct pt_regs *ctx)
return 0;
}
-PROG(__NR_read)(struct pt_regs *ctx)
+PROG(SYS__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd;
@@ -63,7 +68,7 @@ PROG(__NR_read)(struct pt_regs *ctx)
return 0;
}
-PROG(__NR_mmap)(struct pt_regs *ctx)
+PROG(SYS__NR_mmap)(struct pt_regs *ctx)
{
char fmt[] = "mmap\n";
bpf_trace_printk(fmt, sizeof(fmt));
--
2.9.4
Dynamically allocate memory so that JIT images larger than the size of
the statically allocated array can be handled.
Signed-off-by: David Daney <[email protected]>
---
tools/net/bpf_jit_disasm.c | 37 ++++++++++++++++++++++++++-----------
1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c
index ad572e6..422d9abd 100644
--- a/tools/net/bpf_jit_disasm.c
+++ b/tools/net/bpf_jit_disasm.c
@@ -159,8 +159,8 @@ static void put_log_buff(char *buff)
free(buff);
}
-static unsigned int get_last_jit_image(char *haystack, size_t hlen,
- uint8_t *image, size_t ilen)
+static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
+ unsigned int *ilen)
{
char *ptr, *pptr, *tmp;
off_t off = 0;
@@ -168,9 +168,10 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen,
regmatch_t pmatch[1];
unsigned long base;
regex_t regex;
+ uint8_t *image;
if (hlen == 0)
- return 0;
+ return NULL;
ret = regcomp(®ex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ "
"pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED);
@@ -194,11 +195,22 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen,
&flen, &proglen, &pass, &base);
if (ret != 4) {
regfree(®ex);
- return 0;
+ return NULL;
+ }
+ if (proglen > 1000000) {
+ printf("proglen of %d too big, stopping\n", proglen);
+ return NULL;
}
+ image = malloc(proglen);
+ if (!image) {
+ printf("Out of memory\n");
+ return NULL;
+ }
+ memset(image, 0, proglen);
+
tmp = ptr = haystack + off;
- while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) {
+ while ((ptr = strtok(tmp, "\n")) != NULL && ulen < proglen) {
tmp = NULL;
if (!strstr(ptr, "JIT code"))
continue;
@@ -208,10 +220,12 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen,
ptr = pptr;
do {
image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16);
- if (ptr == pptr || ulen >= ilen) {
+ if (ptr == pptr) {
ulen--;
break;
}
+ if (ulen >= proglen)
+ break;
ptr = pptr;
} while (1);
}
@@ -222,7 +236,8 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen,
printf("%lx + <x>:\n", base);
regfree(®ex);
- return ulen;
+ *ilen = ulen;
+ return image;
}
static void usage(void)
@@ -237,12 +252,12 @@ static void usage(void)
int main(int argc, char **argv)
{
unsigned int len, klen, opt, opcodes = 0;
- static uint8_t image[32768];
char *kbuff, *file = NULL;
char *ofile = NULL;
int ofd;
ssize_t nr;
uint8_t *pos;
+ uint8_t *image = NULL;
while ((opt = getopt(argc, argv, "of:O:")) != -1) {
switch (opt) {
@@ -262,7 +277,6 @@ int main(int argc, char **argv)
}
bfd_init();
- memset(image, 0, sizeof(image));
kbuff = get_log_buff(file, &klen);
if (!kbuff) {
@@ -270,8 +284,8 @@ int main(int argc, char **argv)
return -1;
}
- len = get_last_jit_image(kbuff, klen, image, sizeof(image));
- if (len <= 0) {
+ image = get_last_jit_image(kbuff, klen, &len);
+ if (!image) {
fprintf(stderr, "No JIT image found!\n");
goto done;
}
@@ -301,5 +315,6 @@ int main(int argc, char **argv)
done:
put_log_buff(kbuff);
+ free(image);
return 0;
}
--
2.9.4
On 06/14/2017 01:49 AM, David Daney wrote:
> Dynamically allocate memory so that JIT images larger than the size of
> the statically allocated array can be handled.
>
> Signed-off-by: David Daney <[email protected]>
Acked-by: Daniel Borkmann <[email protected]>
On 06/14/2017 01:49 AM, David Daney wrote:
> On MIPS, conditional branches can only span 32k instructions. To
> exceed this limit in the JIT with the BPF maximum of 4k insns, we need
> to choose eBPF insns that expand to more than 8 machine instructions.
> Use BPF_LD_ABS as it is quite complex. This forces the JIT to invert
> the sense of the branch to branch around a long jump to the end.
>
> This (somewhat) verifies that the branch inversion logic and target
> address calculation of the long jumps are done correctly.
>
> Signed-off-by: David Daney <[email protected]>
Acked-by: Daniel Borkmann <[email protected]>
On 06/14/2017 01:49 AM, David Daney wrote:
> Signed-off-by: David Daney <[email protected]>
Acked-by: Daniel Borkmann <[email protected]>
On 06/14/2017 01:49 AM, David Daney wrote:
> There are two problems:
>
> 1) In MIPS the __NR_* macros expand to an expression, this causes the
> sections of the object file to be named like:
>
> .
> .
> .
> [ 5] kprobe/(5000 + 1) PROGBITS 0000000000000000 000160 ...
> [ 6] kprobe/(5000 + 0) PROGBITS 0000000000000000 000258 ...
> [ 7] kprobe/(5000 + 9) PROGBITS 0000000000000000 000348 ...
> .
> .
> .
>
> The fix here is to use the "asm_offsets" trick to evaluate the macros
> in the C compiler and generate a header file with a usable form of the
> macros.
>
> 2) MIPS syscall numbers start at 5000, so we need a bigger map to hold
> the sub-programs.
>
> Signed-off-by: David Daney <[email protected]>
Acked-by: Daniel Borkmann <[email protected]>
On 06/14/2017 01:49 AM, David Daney wrote:
> This is a grab bag of changes to the bpf testing infrastructure I
> developed working on MIPS eBPF JIT support. The change to
> bpf_jit_disasm is probably universally beneficial, the others are more
> MIPS specific.
I think these could go independently through net-next tree?
Thanks,
Daniel
> David Daney (4):
> tools: bpf_jit_disasm: Handle large images.
> test_bpf: Add test to make conditional jump cross a large number of
> insns.
> bpf: Add MIPS support to samples/bpf.
> samples/bpf: Fix tracex5 to work with MIPS syscalls.
>
> lib/test_bpf.c | 32 ++++++++++++++++++++++++++++++++
> samples/bpf/Makefile | 13 +++++++++++++
> samples/bpf/bpf_helpers.h | 13 +++++++++++++
> samples/bpf/syscall_nrs.c | 12 ++++++++++++
> samples/bpf/tracex5_kern.c | 11 ++++++++---
> tools/net/bpf_jit_disasm.c | 37 ++++++++++++++++++++++++++-----------
> 6 files changed, 104 insertions(+), 14 deletions(-)
> create mode 100644 samples/bpf/syscall_nrs.c
>
On 06/13/2017 05:22 PM, Daniel Borkmann wrote:
> On 06/14/2017 01:49 AM, David Daney wrote:
>> This is a grab bag of changes to the bpf testing infrastructure I
>> developed working on MIPS eBPF JIT support. The change to
>> bpf_jit_disasm is probably universally beneficial, the others are more
>> MIPS specific.
>
> I think these could go independently through net-next tree?
Yes, if davem is happy with them, I think that makes sense that he take
them via net-next.
David Daney
>
> Thanks,
> Daniel
>
>> David Daney (4):
>> tools: bpf_jit_disasm: Handle large images.
>> test_bpf: Add test to make conditional jump cross a large number of
>> insns.
>> bpf: Add MIPS support to samples/bpf.
>> samples/bpf: Fix tracex5 to work with MIPS syscalls.
>>
>> lib/test_bpf.c | 32 ++++++++++++++++++++++++++++++++
>> samples/bpf/Makefile | 13 +++++++++++++
>> samples/bpf/bpf_helpers.h | 13 +++++++++++++
>> samples/bpf/syscall_nrs.c | 12 ++++++++++++
>> samples/bpf/tracex5_kern.c | 11 ++++++++---
>> tools/net/bpf_jit_disasm.c | 37 ++++++++++++++++++++++++++-----------
>> 6 files changed, 104 insertions(+), 14 deletions(-)
>> create mode 100644 samples/bpf/syscall_nrs.c
>>
>
From: David Daney <[email protected]>
Date: Wed, 14 Jun 2017 08:54:03 -0700
> On 06/13/2017 05:22 PM, Daniel Borkmann wrote:
>> On 06/14/2017 01:49 AM, David Daney wrote:
>>> This is a grab bag of changes to the bpf testing infrastructure I
>>> developed working on MIPS eBPF JIT support. The change to
>>> bpf_jit_disasm is probably universally beneficial, the others are more
>>> MIPS specific.
>> I think these could go independently through net-next tree?
>
> Yes, if davem is happy with them, I think that makes sense that he
> take them via net-next.
Series applied to net-next, thanks!