2014-07-25 08:05:15

by Alexei Starovoitov

[permalink] [raw]
Subject: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

'sk_filter' name is used as 'struct sk_filter', function sk_filter() and
as variable 'sk_filter', which makes code hard to read.
Also it's easily confused with 'struct sock_filter'
Rename 'struct sk_filter' to 'struct bpf_prog' to clarify semantics and
align the name with generic BPF use model.

The only ugly place is uapi/linux/netfilter/xt_bpf.h which
managed to expose kernel internal structure into uapi header.
Though it shouldn't even compile in user space, preserve the mess by
adding empty 'struct sk_filter;' there and type cast it to 'struct bpf_prog'
inside kernel in net/netfilter/xt_bpf.c

Signed-off-by: Alexei Starovoitov <[email protected]>
---

alternative fix for xt_bpf.h could be to replace:
/* only used in the kernel */
struct sk_filter *filter __attribute__((aligned(8)));
with
/* only used in the kernel */
void *filter __attribute__((aligned(8)));

but this 'void *' approach may further break broken userspace,
whereas the fix implemented here is more seamless.

Tested on x64, arm, sparc

Documentation/networking/filter.txt | 2 +-
arch/arm/net/bpf_jit_32.c | 8 +++---
arch/mips/net/bpf_jit.c | 8 +++---
arch/powerpc/net/bpf_jit_comp.c | 8 +++---
arch/s390/net/bpf_jit_comp.c | 4 +--
arch/sparc/net/bpf_jit_comp.c | 4 +--
arch/x86/net/bpf_jit_comp.c | 10 +++----
drivers/net/ppp/ppp_generic.c | 4 +--
drivers/net/team/team_mode_loadbalance.c | 8 +++---
include/linux/filter.h | 28 +++++++++----------
include/linux/isdn_ppp.h | 4 +--
include/net/sock.h | 2 +-
include/uapi/linux/netfilter/xt_bpf.h | 2 ++
kernel/bpf/core.c | 6 ++---
kernel/seccomp.c | 2 +-
lib/test_bpf.c | 12 ++++-----
net/core/filter.c | 43 +++++++++++++++---------------
net/core/ptp_classifier.c | 2 +-
net/core/sock.c | 4 +--
net/core/sock_diag.c | 2 +-
net/netfilter/xt_bpf.c | 7 ++---
net/packet/af_packet.c | 2 +-
net/sched/cls_bpf.c | 4 +--
23 files changed, 89 insertions(+), 87 deletions(-)

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index ee78eba78a9d..138b645d5682 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -589,7 +589,7 @@ in the eBPF interpreter. For in-kernel handlers, this all works transparently
by using sk_unattached_filter_create() for setting up the filter, resp.
sk_unattached_filter_destroy() for destroying it. The macro
SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed
-code to run the filter. 'filter' is a pointer to struct sk_filter that we
+code to run the filter. 'filter' is a pointer to struct bpf_prog that we
got from sk_unattached_filter_create(), and 'ctx' the given context (e.g.
skb pointer). All constraints and restrictions from sk_chk_filter() apply
before a conversion to the new layout is being done behind the scenes!
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index fb5503ce016f..a37b989a2f91 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -56,7 +56,7 @@
#define FLAG_NEED_X_RESET (1 << 0)

struct jit_ctx {
- const struct sk_filter *skf;
+ const struct bpf_prog *skf;
unsigned idx;
unsigned prologue_bytes;
int ret0_fp_idx;
@@ -465,7 +465,7 @@ static inline void update_on_xread(struct jit_ctx *ctx)
static int build_body(struct jit_ctx *ctx)
{
void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
- const struct sk_filter *prog = ctx->skf;
+ const struct bpf_prog *prog = ctx->skf;
const struct sock_filter *inst;
unsigned i, load_order, off, condt;
int imm12;
@@ -857,7 +857,7 @@ b_epilogue:
}


-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
{
struct jit_ctx ctx;
unsigned tmp_idx;
@@ -926,7 +926,7 @@ out:
return;
}

-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
if (fp->jited)
module_free(NULL, fp->bpf_func);
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index b87390a56a2f..05a56619ece2 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -131,7 +131,7 @@
* @target: Memory location for the compiled filter
*/
struct jit_ctx {
- const struct sk_filter *skf;
+ const struct bpf_prog *skf;
unsigned int prologue_bytes;
u32 idx;
u32 flags;
@@ -789,7 +789,7 @@ static int pkt_type_offset(void)
static int build_body(struct jit_ctx *ctx)
{
void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
- const struct sk_filter *prog = ctx->skf;
+ const struct bpf_prog *prog = ctx->skf;
const struct sock_filter *inst;
unsigned int i, off, load_order, condt;
u32 k, b_off __maybe_unused;
@@ -1369,7 +1369,7 @@ jmp_cmp:

int bpf_jit_enable __read_mostly;

-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
{
struct jit_ctx ctx;
unsigned int alloc_size, tmp_idx;
@@ -1423,7 +1423,7 @@ out:
kfree(ctx.offsets);
}

-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
if (fp->jited)
module_free(NULL, fp->bpf_func);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 82e82cadcde5..3afa6f4c1957 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}

-static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
+static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx)
{
int i;
@@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)

/* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
+static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx,
unsigned int *addrs)
{
@@ -569,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
return 0;
}

-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
{
unsigned int proglen;
unsigned int alloclen;
@@ -693,7 +693,7 @@ out:
return;
}

-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
if (fp->jited)
module_free(NULL, fp->bpf_func);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a2cbd875543a..61e45b7c04d7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
return header;
}

-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
{
struct bpf_binary_header *header = NULL;
unsigned long size, prg_len, lit_len;
@@ -875,7 +875,7 @@ out:
kfree(addrs);
}

-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
struct bpf_binary_header *header = (void *)addr;
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 892a102671ad..1f76c22a6a75 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -354,7 +354,7 @@ do { *prog++ = BR_OPC | WDISP22(OFF); \
* emit_jump() calls with adjusted offsets.
*/

-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
{
unsigned int cleanup_addr, proglen, oldproglen = 0;
u32 temp[8], *prog, *func, seen = 0, pass;
@@ -808,7 +808,7 @@ out:
return;
}

-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
if (fp->jited)
module_free(NULL, fp->bpf_func);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 71737a83f022..ad8701da196c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -211,7 +211,7 @@ struct jit_context {
bool seen_ld_abs;
};

-static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
struct bpf_insn *insn = bpf_prog->insnsi;
@@ -862,11 +862,11 @@ common_load: ctx->seen_ld_abs = true;
return proglen;
}

-void bpf_jit_compile(struct sk_filter *prog)
+void bpf_jit_compile(struct bpf_prog *prog)
{
}

-void bpf_int_jit_compile(struct sk_filter *prog)
+void bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
int proglen, oldproglen = 0;
@@ -932,7 +932,7 @@ out:

static void bpf_jit_free_deferred(struct work_struct *work)
{
- struct sk_filter *fp = container_of(work, struct sk_filter, work);
+ struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
struct bpf_binary_header *header = (void *)addr;

@@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
kfree(fp);
}

-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
if (fp->jited) {
INIT_WORK(&fp->work, bpf_jit_free_deferred);
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 765248b42a0a..4610c7fe284a 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -143,8 +143,8 @@ struct ppp {
struct sk_buff_head mrq; /* MP: receive reconstruction queue */
#endif /* CONFIG_PPP_MULTILINK */
#ifdef CONFIG_PPP_FILTER
- struct sk_filter *pass_filter; /* filter for packets to pass */
- struct sk_filter *active_filter;/* filter for pkts to reset idle */
+ struct bpf_prog *pass_filter; /* filter for packets to pass */
+ struct bpf_prog *active_filter; /* filter for pkts to reset idle */
#endif /* CONFIG_PPP_FILTER */
struct net *ppp_net; /* the net we belong to */
struct ppp_link_stats stats64; /* 64 bit network stats */
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index a58dfebb5512..649c8f2daa11 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -58,7 +58,7 @@ struct lb_priv_ex {
};

struct lb_priv {
- struct sk_filter __rcu *fp;
+ struct bpf_prog __rcu *fp;
lb_select_tx_port_func_t __rcu *select_tx_port_func;
struct lb_pcpu_stats __percpu *pcpu_stats;
struct lb_priv_ex *ex; /* priv extension */
@@ -174,7 +174,7 @@ static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name)
static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
struct sk_buff *skb)
{
- struct sk_filter *fp;
+ struct bpf_prog *fp;
uint32_t lhash;
unsigned char *c;

@@ -271,8 +271,8 @@ static void __fprog_destroy(struct sock_fprog_kern *fprog)
static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
{
struct lb_priv *lb_priv = get_lb_priv(team);
- struct sk_filter *fp = NULL;
- struct sk_filter *orig_fp;
+ struct bpf_prog *fp = NULL;
+ struct bpf_prog *orig_fp;
struct sock_fprog_kern *fprog = NULL;
int err;

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20dd50ef7271..2f4c1b10e99d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -323,7 +323,7 @@ struct sk_buff;
struct sock;
struct seccomp_data;

-struct sk_filter {
+struct bpf_prog {
atomic_t refcnt;
u32 jited:1, /* Is our filter JIT'ed? */
len:31; /* Number of filter blocks */
@@ -340,8 +340,8 @@ struct sk_filter {

static inline unsigned int sk_filter_size(unsigned int proglen)
{
- return max(sizeof(struct sk_filter),
- offsetof(struct sk_filter, insns[proglen]));
+ return max(sizeof(struct bpf_prog),
+ offsetof(struct bpf_prog, insns[proglen]));
}

#define sk_filter_proglen(fprog) \
@@ -349,15 +349,15 @@ static inline unsigned int sk_filter_size(unsigned int proglen)

int sk_filter(struct sock *sk, struct sk_buff *skb);

-void sk_filter_select_runtime(struct sk_filter *fp);
-void sk_filter_free(struct sk_filter *fp);
+void sk_filter_select_runtime(struct bpf_prog *fp);
+void sk_filter_free(struct bpf_prog *fp);

int sk_convert_filter(struct sock_filter *prog, int len,
struct bpf_insn *new_prog, int *new_len);

-int sk_unattached_filter_create(struct sk_filter **pfp,
+int sk_unattached_filter_create(struct bpf_prog **pfp,
struct sock_fprog_kern *fprog);
-void sk_unattached_filter_destroy(struct sk_filter *fp);
+void sk_unattached_filter_destroy(struct bpf_prog *fp);

int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_detach_filter(struct sock *sk);
@@ -366,11 +366,11 @@ int sk_chk_filter(const struct sock_filter *filter, unsigned int flen);
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
unsigned int len);

-void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
-void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
+void sk_filter_charge(struct sock *sk, struct bpf_prog *fp);
+void sk_filter_uncharge(struct sock *sk, struct bpf_prog *fp);

u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-void bpf_int_jit_compile(struct sk_filter *fp);
+void bpf_int_jit_compile(struct bpf_prog *fp);

#define BPF_ANC BIT(15)

@@ -424,8 +424,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
#include <linux/linkage.h>
#include <linux/printk.h>

-void bpf_jit_compile(struct sk_filter *fp);
-void bpf_jit_free(struct sk_filter *fp);
+void bpf_jit_compile(struct bpf_prog *fp);
+void bpf_jit_free(struct bpf_prog *fp);

static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
u32 pass, void *image)
@@ -439,11 +439,11 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
#else
#include <linux/slab.h>

-static inline void bpf_jit_compile(struct sk_filter *fp)
+static inline void bpf_jit_compile(struct bpf_prog *fp)
{
}

-static inline void bpf_jit_free(struct sk_filter *fp)
+static inline void bpf_jit_free(struct bpf_prog *fp)
{
kfree(fp);
}
diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h
index 8e10f57f109f..a0070c6dfaf8 100644
--- a/include/linux/isdn_ppp.h
+++ b/include/linux/isdn_ppp.h
@@ -180,8 +180,8 @@ struct ippp_struct {
struct slcompress *slcomp;
#endif
#ifdef CONFIG_IPPP_FILTER
- struct sk_filter *pass_filter; /* filter for packets to pass */
- struct sk_filter *active_filter; /* filter for pkts to reset idle */
+ struct bpf_prog *pass_filter; /* filter for packets to pass */
+ struct bpf_prog *active_filter; /* filter for pkts to reset idle */
#endif
unsigned long debug;
struct isdn_ppp_compressor *compressor,*decompressor;
diff --git a/include/net/sock.h b/include/net/sock.h
index 720773304a85..ec21bd1b37e1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -356,7 +356,7 @@ struct sock {
atomic_t sk_drops;
int sk_rcvbuf;

- struct sk_filter __rcu *sk_filter;
+ struct bpf_prog __rcu *sk_filter;
struct socket_wq __rcu *sk_wq;

#ifdef CONFIG_NET_DMA
diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 5dda450eb55b..2ec9fbcd06f9 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -6,6 +6,8 @@

#define XT_BPF_MAX_NUM_INSTR 64

+struct sk_filter;
+
struct xt_bpf_info {
__u16 bpf_program_num_elem;
struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 265a02cc822d..e29000adfee2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -508,7 +508,7 @@ load_byte:
return 0;
}

-void __weak bpf_int_jit_compile(struct sk_filter *prog)
+void __weak bpf_int_jit_compile(struct bpf_prog *prog)
{
}

@@ -519,7 +519,7 @@ void __weak bpf_int_jit_compile(struct sk_filter *prog)
* try to JIT internal BPF program, if JIT is not available select interpreter
* BPF program will be executed via SK_RUN_FILTER() macro
*/
-void sk_filter_select_runtime(struct sk_filter *fp)
+void sk_filter_select_runtime(struct bpf_prog *fp)
{
fp->bpf_func = (void *) __sk_run_filter;

@@ -529,7 +529,7 @@ void sk_filter_select_runtime(struct sk_filter *fp)
EXPORT_SYMBOL_GPL(sk_filter_select_runtime);

/* free internal BPF program */
-void sk_filter_free(struct sk_filter *fp)
+void sk_filter_free(struct bpf_prog *fp)
{
bpf_jit_free(fp);
}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 565743db5384..5026b9cef786 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -54,7 +54,7 @@
struct seccomp_filter {
atomic_t usage;
struct seccomp_filter *prev;
- struct sk_filter *prog;
+ struct bpf_prog *prog;
};

/* Limit any path through the tree to 256KB worth of instructions. */
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 5f48623ee1a7..00e7d8ec0cee 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1761,9 +1761,9 @@ static int probe_filter_length(struct sock_filter *fp)
return len + 1;
}

-static struct sk_filter *generate_filter(int which, int *err)
+static struct bpf_prog *generate_filter(int which, int *err)
{
- struct sk_filter *fp;
+ struct bpf_prog *fp;
struct sock_fprog_kern fprog;
unsigned int flen = probe_filter_length(tests[which].u.insns);
__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
@@ -1817,7 +1817,7 @@ static struct sk_filter *generate_filter(int which, int *err)
return fp;
}

-static void release_filter(struct sk_filter *fp, int which)
+static void release_filter(struct bpf_prog *fp, int which)
{
__u8 test_type = tests[which].aux & TEST_TYPE_MASK;

@@ -1831,7 +1831,7 @@ static void release_filter(struct sk_filter *fp, int which)
}
}

-static int __run_one(const struct sk_filter *fp, const void *data,
+static int __run_one(const struct bpf_prog *fp, const void *data,
int runs, u64 *duration)
{
u64 start, finish;
@@ -1850,7 +1850,7 @@ static int __run_one(const struct sk_filter *fp, const void *data,
return ret;
}

-static int run_one(const struct sk_filter *fp, struct bpf_test *test)
+static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
{
int err_cnt = 0, i, runs = MAX_TESTRUNS;

@@ -1884,7 +1884,7 @@ static __init int test_bpf(void)
int i, err_cnt = 0, pass_cnt = 0;

for (i = 0; i < ARRAY_SIZE(tests); i++) {
- struct sk_filter *fp;
+ struct bpf_prog *fp;
int err;

pr_info("#%d %s ", i, tests[i].descr);
diff --git a/net/core/filter.c b/net/core/filter.c
index f3b2d5e9fe5f..5b1ebffc6c19 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -60,7 +60,7 @@
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
int err;
- struct sk_filter *filter;
+ struct bpf_prog *filter;

/*
* If the skb was allocated from pfmemalloc reserves, only
@@ -810,7 +810,7 @@ int sk_chk_filter(const struct sock_filter *filter, unsigned int flen)
}
EXPORT_SYMBOL(sk_chk_filter);

-static int sk_store_orig_filter(struct sk_filter *fp,
+static int sk_store_orig_filter(struct bpf_prog *fp,
const struct sock_fprog *fprog)
{
unsigned int fsize = sk_filter_proglen(fprog);
@@ -831,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp,
return 0;
}

-static void sk_release_orig_filter(struct sk_filter *fp)
+static void sk_release_orig_filter(struct bpf_prog *fp)
{
struct sock_fprog_kern *fprog = fp->orig_prog;

@@ -847,7 +847,7 @@ static void sk_release_orig_filter(struct sk_filter *fp)
*/
static void sk_filter_release_rcu(struct rcu_head *rcu)
{
- struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+ struct bpf_prog *fp = container_of(rcu, struct bpf_prog, rcu);

sk_release_orig_filter(fp);
sk_filter_free(fp);
@@ -859,29 +859,28 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
*
* Remove a filter from a socket and release its resources.
*/
-static void sk_filter_release(struct sk_filter *fp)
+static void sk_filter_release(struct bpf_prog *fp)
{
if (atomic_dec_and_test(&fp->refcnt))
call_rcu(&fp->rcu, sk_filter_release_rcu);
}

-void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
+void sk_filter_uncharge(struct sock *sk, struct bpf_prog *fp)
{
atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
sk_filter_release(fp);
}

-void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+void sk_filter_charge(struct sock *sk, struct bpf_prog *fp)
{
atomic_inc(&fp->refcnt);
atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
}

-static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
- struct sock *sk,
- unsigned int len)
+static struct bpf_prog *__sk_migrate_realloc(struct bpf_prog *fp,
+ struct sock *sk, unsigned int len)
{
- struct sk_filter *fp_new;
+ struct bpf_prog *fp_new;

if (sk == NULL)
return krealloc(fp, len, GFP_KERNEL);
@@ -900,11 +899,11 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
return fp_new;
}

-static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
- struct sock *sk)
+static struct bpf_prog *__sk_migrate_filter(struct bpf_prog *fp,
+ struct sock *sk)
{
struct sock_filter *old_prog;
- struct sk_filter *old_fp;
+ struct bpf_prog *old_fp;
int err, new_len, old_len = fp->len;

/* We are free to overwrite insns et al right here as it
@@ -971,8 +970,8 @@ out_err:
return ERR_PTR(err);
}

-static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
- struct sock *sk)
+static struct bpf_prog *__sk_prepare_filter(struct bpf_prog *fp,
+ struct sock *sk)
{
int err;

@@ -1012,11 +1011,11 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
* If an error occurs or there is insufficient memory for the filter
* a negative errno code is returned. On success the return is zero.
*/
-int sk_unattached_filter_create(struct sk_filter **pfp,
+int sk_unattached_filter_create(struct bpf_prog **pfp,
struct sock_fprog_kern *fprog)
{
unsigned int fsize = sk_filter_proglen(fprog);
- struct sk_filter *fp;
+ struct bpf_prog *fp;

/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
@@ -1048,7 +1047,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_create);

-void sk_unattached_filter_destroy(struct sk_filter *fp)
+void sk_unattached_filter_destroy(struct bpf_prog *fp)
{
sk_filter_release(fp);
}
@@ -1066,7 +1065,7 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
- struct sk_filter *fp, *old_fp;
+ struct bpf_prog *fp, *old_fp;
unsigned int fsize = sk_filter_proglen(fprog);
unsigned int sk_fsize = sk_filter_size(fprog->len);
int err;
@@ -1117,7 +1116,7 @@ EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
- struct sk_filter *filter;
+ struct bpf_prog *filter;

if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
@@ -1138,7 +1137,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
unsigned int len)
{
struct sock_fprog_kern *fprog;
- struct sk_filter *filter;
+ struct bpf_prog *filter;
int ret = 0;

lock_sock(sk);
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 12ab7b4be609..8debe826dc1a 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -107,7 +107,7 @@
#include <linux/filter.h>
#include <linux/ptp_classify.h>

-static struct sk_filter *ptp_insns __read_mostly;
+static struct bpf_prog *ptp_insns __read_mostly;

unsigned int ptp_classify_raw(const struct sk_buff *skb)
{
diff --git a/net/core/sock.c b/net/core/sock.c
index ca9b65199d28..af81f1415f0a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1405,7 +1405,7 @@ EXPORT_SYMBOL(sk_alloc);

static void __sk_free(struct sock *sk)
{
- struct sk_filter *filter;
+ struct bpf_prog *filter;

if (sk->sk_destruct)
sk->sk_destruct(sk);
@@ -1481,7 +1481,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)

newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
if (newsk != NULL) {
- struct sk_filter *filter;
+ struct bpf_prog *filter;

sock_copy(newsk, sk);

diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a4216a4c9572..16072cf03432 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -53,7 +53,7 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
struct sk_buff *skb, int attrtype)
{
struct sock_fprog_kern *fprog;
- struct sk_filter *filter;
+ struct bpf_prog *filter;
struct nlattr *attr;
unsigned int flen;
int err = 0;
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index bbffdbdaf603..85be0c89d640 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -28,7 +28,8 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
program.len = info->bpf_program_num_elem;
program.filter = info->bpf_program;

- if (sk_unattached_filter_create(&info->filter, &program)) {
+ if (sk_unattached_filter_create((struct bpf_prog **) &info->filter,
+ &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
}
@@ -40,13 +41,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;

- return SK_RUN_FILTER(info->filter, skb);
+ return SK_RUN_FILTER(((struct bpf_prog *) info->filter), skb);
}

static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- sk_unattached_filter_destroy(info->filter);
+ sk_unattached_filter_destroy((struct bpf_prog *) info->filter);
}

static struct xt_match bpf_mt_reg __read_mostly = {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 614ca91f785a..1db01efbd4f2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1732,7 +1732,7 @@ static unsigned int run_filter(const struct sk_buff *skb,
const struct sock *sk,
unsigned int res)
{
- struct sk_filter *filter;
+ struct bpf_prog *filter;

rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 13f64df2c710..f5a24875e0ff 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -30,7 +30,7 @@ struct cls_bpf_head {
};

struct cls_bpf_prog {
- struct sk_filter *filter;
+ struct bpf_prog *filter;
struct sock_filter *bpf_ops;
struct tcf_exts exts;
struct tcf_result res;
@@ -161,7 +161,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
struct sock_filter *bpf_ops, *bpf_old;
struct tcf_exts exts;
struct sock_fprog_kern tmp;
- struct sk_filter *fp, *fp_old;
+ struct bpf_prog *fp, *fp_old;
u16 bpf_size, bpf_len;
u32 classid;
int ret;
--
1.7.9.5


2014-07-25 11:25:48

by Daniel Borkmann

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

[ also Cc'ing Willem, Pablo ]

On 07/25/2014 10:04 AM, Alexei Starovoitov wrote:
> 'sk_filter' name is used as 'struct sk_filter', function sk_filter() and
> as variable 'sk_filter', which makes code hard to read.
> Also it's easily confused with 'struct sock_filter'
> Rename 'struct sk_filter' to 'struct bpf_prog' to clarify semantics and
> align the name with generic BPF use model.

Agreed, as we went for kernel/bpf/, renaming makes absolutely sense.

> The only ugly place is uapi/linux/netfilter/xt_bpf.h which
> managed to expose kernel internal structure into uapi header.
> Though it shouldn't even compile in user space, preserve the mess by
> adding empty 'struct sk_filter;' there and type cast it to 'struct bpf_prog'
> inside kernel in net/netfilter/xt_bpf.c
>
> Signed-off-by: Alexei Starovoitov <[email protected]>
> ---
>
> alternative fix for xt_bpf.h could be to replace:
> /* only used in the kernel */
> struct sk_filter *filter __attribute__((aligned(8)));
> with
> /* only used in the kernel */
> void *filter __attribute__((aligned(8)));
>
> but this 'void *' approach may further break broken userspace,
> whereas the fix implemented here is more seamless.

Yep, that's not good, 'struct sk_filter' should never have been in a uapi
file actually.

I think your current approach here, as you say, is more seamless, but as
the struct itself is *only* hidden inside kernel space, and there's no way
anyone can mess around with it, we might as well go for the more correct
void pointer, imho, it won't change anything in the structure size at least.

I guess the alignment in xt_bpf_info is for 32bit user space w/ 64bit kernel
space? I haven't looked so far into how exactly x_tables transfers that back
to user space, but are we effectively _leaking_ a kernel address after we
called sk_unattached_filter_create(&info->filter, ...) when dumping back to
user space? I guess for a possible leak in the _padding_ of the structure,
we might be copying gargabe from user space to kernel and back, that might
be less problematic, I think.

> Tested on x64, arm, sparc

[ Rest of the patch looks good, thanks. ]

2014-07-25 11:54:36

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 01:25:35PM +0200, Daniel Borkmann wrote:
> [ also Cc'ing Willem, Pablo ]
>
> On 07/25/2014 10:04 AM, Alexei Starovoitov wrote:
> >'sk_filter' name is used as 'struct sk_filter', function sk_filter() and
> >as variable 'sk_filter', which makes code hard to read.
> >Also it's easily confused with 'struct sock_filter'
> >Rename 'struct sk_filter' to 'struct bpf_prog' to clarify semantics and
> >align the name with generic BPF use model.
>
> Agreed, as we went for kernel/bpf/, renaming makes absolutely sense.

My nft socket filtering changes are accomodated into struct sk_filter,
and will still be, so I still need some generic name there...

Please, leave this as it is.

> >The only ugly place is uapi/linux/netfilter/xt_bpf.h which
> >managed to expose kernel internal structure into uapi header.
> >Though it shouldn't even compile in user space, preserve the mess by
> >adding empty 'struct sk_filter;' there and type cast it to 'struct bpf_prog'
> >inside kernel in net/netfilter/xt_bpf.c
> >
> >Signed-off-by: Alexei Starovoitov <[email protected]>
> >---
> >
> >alternative fix for xt_bpf.h could be to replace:
> > /* only used in the kernel */
> > struct sk_filter *filter __attribute__((aligned(8)));
> >with
> > /* only used in the kernel */
> > void *filter __attribute__((aligned(8)));
> >
> >but this 'void *' approach may further break broken userspace,
> >whereas the fix implemented here is more seamless.
>
> Yep, that's not good, 'struct sk_filter' should never have been in a uapi
> file actually.

You can just send me a patch to change it to void. It's an internal
kernel pointer as the comment states. There is **no** way that
userspace can lurk with that from iptables at all.

2014-07-25 13:00:26

by Daniel Borkmann

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On 07/25/2014 01:54 PM, Pablo Neira Ayuso wrote:
> On Fri, Jul 25, 2014 at 01:25:35PM +0200, Daniel Borkmann wrote:
>> [ also Cc'ing Willem, Pablo ]
>>
>> On 07/25/2014 10:04 AM, Alexei Starovoitov wrote:
>>> 'sk_filter' name is used as 'struct sk_filter', function sk_filter() and
>>> as variable 'sk_filter', which makes code hard to read.
>>> Also it's easily confused with 'struct sock_filter'
>>> Rename 'struct sk_filter' to 'struct bpf_prog' to clarify semantics and
>>> align the name with generic BPF use model.
>>
>> Agreed, as we went for kernel/bpf/, renaming makes absolutely sense.
>
> My nft socket filtering changes are accomodated into struct sk_filter,
> and will still be, so I still need some generic name there...

All the parts from filter.c which is BPF's core engine have been moved
into kernel/bpf/ to get it ready for tracing et al, since there is not
always a socket context anymore. The *whole* infrastructure around struct
sk_filter is [e]BPF and used in non-net related contexts as well, whereas
nft socket filtering is *only* for sockets. Due to the socket-only specific
use case why doesn't it make more sense to have a union in struct sock
around sk_filter (or however we name it) and only allow one of the two
being loaded on a socket?

2014-07-25 13:54:22

by Willem de Bruijn

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

>> >alternative fix for xt_bpf.h could be to replace:
>> > /* only used in the kernel */
>> > struct sk_filter *filter __attribute__((aligned(8)));
>> >with
>> > /* only used in the kernel */
>> > void *filter __attribute__((aligned(8)));
>> >
>> >but this 'void *' approach may further break broken userspace,
>> >whereas the fix implemented here is more seamless.
>>
>> Yep, that's not good, 'struct sk_filter' should never have been in a uapi
>> file actually.

This follows a convention in include/uapi/linux/netfilter/*.h that
likely predates the introduction of uapi. A search for "Used
internally by the kernel" shows many more examples. I should not have
included filter.h, however. The common behavior when using pointers
to kernel-internal structures is to have a forward declaration. I suggest
making that change, instead of changing to void *. This avoids having
to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:

-#include <linux/filter.h>
#include <linux/types.h>

#define XT_BPF_MAX_NUM_INSTR 64

+struct sk_filter;
+
struct xt_bpf_info {

I can send this as a separate patch to net-next, if that helps.

> You can just send me a patch to change it to void. It's an internal
> kernel pointer as the comment states. There is **no** way that
> userspace can lurk with that from iptables at all.

2014-07-25 17:24:32

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 6:00 AM, Daniel Borkmann <[email protected]> wrote:
> On 07/25/2014 01:54 PM, Pablo Neira Ayuso wrote:
>>
>> On Fri, Jul 25, 2014 at 01:25:35PM +0200, Daniel Borkmann wrote:
>>>
>>> [ also Cc'ing Willem, Pablo ]
>>>
>>> On 07/25/2014 10:04 AM, Alexei Starovoitov wrote:
>>>>
>>>> 'sk_filter' name is used as 'struct sk_filter', function sk_filter() and
>>>> as variable 'sk_filter', which makes code hard to read.
>>>> Also it's easily confused with 'struct sock_filter'
>>>> Rename 'struct sk_filter' to 'struct bpf_prog' to clarify semantics and
>>>> align the name with generic BPF use model.
>>>
>>>
>>> Agreed, as we went for kernel/bpf/, renaming makes absolutely sense.
>>
>>
>> My nft socket filtering changes are accomodated into struct sk_filter,
>> and will still be, so I still need some generic name there...
>
>
> All the parts from filter.c which is BPF's core engine have been moved
> into kernel/bpf/ to get it ready for tracing et al, since there is not
> always a socket context anymore. The *whole* infrastructure around struct
> sk_filter is [e]BPF and used in non-net related contexts as well, whereas
> nft socket filtering is *only* for sockets. Due to the socket-only specific
> use case why doesn't it make more sense to have a union in struct sock
> around sk_filter (or however we name it) and only allow one of the two
> being loaded on a socket?

yep.
Adding nft specific things to struct sk_filter/bpf_prog is not correct,
since this struct is already part of seccomp and will be used
in net-less configurations. SK_RUN_FILTER() macro will also be
renamed into something like RUN_BPF_RPOG(). It's one and only
way to invoke eBPF programs. Adding nft selector cannot work,
since eBPF is used with generic context whereas nft is skb specific.
If you want to add nft filtering capabilities to sockets, you'd need
to add union around 'struct bpf_prog' inside 'struct sock', which will be
much cleaner way.

2014-07-25 17:27:18

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 6:53 AM, Willem de Bruijn <[email protected]> wrote:
>>> >alternative fix for xt_bpf.h could be to replace:
>>> > /* only used in the kernel */
>>> > struct sk_filter *filter __attribute__((aligned(8)));
>>> >with
>>> > /* only used in the kernel */
>>> > void *filter __attribute__((aligned(8)));
>>> >
>>> >but this 'void *' approach may further break broken userspace,
>>> >whereas the fix implemented here is more seamless.
>>>
>>> Yep, that's not good, 'struct sk_filter' should never have been in a uapi
>>> file actually.
>
> This follows a convention in include/uapi/linux/netfilter/*.h that
> likely predates the introduction of uapi. A search for "Used
> internally by the kernel" shows many more examples. I should not have
> included filter.h, however. The common behavior when using pointers
> to kernel-internal structures is to have a forward declaration. I suggest
> making that change, instead of changing to void *. This avoids having
> to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:

that will not avoid typecast.
Either 'void *' approach or extra 'struct sk_filter;' approach, both need
type casts to 'struct bpf_prog' in xt_bpf.c
(because of SK_RUN_FILTER macro)
Therefore I prefer extra 'struct sk_filter;' approach.

> -#include <linux/filter.h>
> #include <linux/types.h>
>
> #define XT_BPF_MAX_NUM_INSTR 64
>
> +struct sk_filter;
> +
> struct xt_bpf_info {
>
> I can send this as a separate patch to net-next, if that helps.

2014-07-25 18:32:53

by Willem de Bruijn

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

>> This follows a convention in include/uapi/linux/netfilter/*.h that
>> likely predates the introduction of uapi. A search for "Used
>> internally by the kernel" shows many more examples. I should not have
>> included filter.h, however. The common behavior when using pointers
>> to kernel-internal structures is to have a forward declaration. I suggest
>> making that change, instead of changing to void *. This avoids having
>> to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:
>
> that will not avoid typecast.
> Either 'void *' approach or extra 'struct sk_filter;' approach, both need
> type casts to 'struct bpf_prog' in xt_bpf.c
> (because of SK_RUN_FILTER macro)
> Therefore I prefer extra 'struct sk_filter;' approach.

I hadn't noticed that your patch makes the same change that I
proposed. Nothing in userspace should touch that pointer, so it is
fine to change its type to struct bpf_prog* at the same time. No need
for typecasts.

2014-07-25 18:43:09

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 11:32 AM, Willem de Bruijn <[email protected]> wrote:
>>> This follows a convention in include/uapi/linux/netfilter/*.h that
>>> likely predates the introduction of uapi. A search for "Used
>>> internally by the kernel" shows many more examples. I should not have
>>> included filter.h, however. The common behavior when using pointers
>>> to kernel-internal structures is to have a forward declaration. I suggest
>>> making that change, instead of changing to void *. This avoids having
>>> to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:
>>
>> that will not avoid typecast.
>> Either 'void *' approach or extra 'struct sk_filter;' approach, both need
>> type casts to 'struct bpf_prog' in xt_bpf.c
>> (because of SK_RUN_FILTER macro)
>> Therefore I prefer extra 'struct sk_filter;' approach.
>
> I hadn't noticed that your patch makes the same change that I
> proposed. Nothing in userspace should touch that pointer, so it is
> fine to change its type to struct bpf_prog* at the same time. No need
> for typecasts.

really? I don't think it's a good idea to expose kernel struct type
to user space. How is it even going to compile?
#include <linux/filter.h> brings different files in kernel and in user space.
struct bpf_prog is undefined in user space and compiler will complain.
Adding 'struct bpf_prog;' will be ugly.
imo the lesser evil is adding 'struct sk_filter;' and doing type casts
in kernel.

2014-07-25 18:51:09

by Willem de Bruijn

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 2:43 PM, Alexei Starovoitov <[email protected]> wrote:
> On Fri, Jul 25, 2014 at 11:32 AM, Willem de Bruijn <[email protected]> wrote:
>>>> This follows a convention in include/uapi/linux/netfilter/*.h that
>>>> likely predates the introduction of uapi. A search for "Used
>>>> internally by the kernel" shows many more examples. I should not have
>>>> included filter.h, however. The common behavior when using pointers
>>>> to kernel-internal structures is to have a forward declaration. I suggest
>>>> making that change, instead of changing to void *. This avoids having
>>>> to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:
>>>
>>> that will not avoid typecast.
>>> Either 'void *' approach or extra 'struct sk_filter;' approach, both need
>>> type casts to 'struct bpf_prog' in xt_bpf.c
>>> (because of SK_RUN_FILTER macro)
>>> Therefore I prefer extra 'struct sk_filter;' approach.
>>
>> I hadn't noticed that your patch makes the same change that I
>> proposed. Nothing in userspace should touch that pointer, so it is
>> fine to change its type to struct bpf_prog* at the same time. No need
>> for typecasts.
>
> really? I don't think it's a good idea to expose kernel struct type
> to user space. How is it even going to compile?

a forward declaration.

> #include <linux/filter.h> brings different files in kernel and in user space.
> struct bpf_prog is undefined in user space and compiler will complain.
> Adding 'struct bpf_prog;' will be ugly.
> imo the lesser evil is adding 'struct sk_filter;' and doing type casts
> in kernel.

but the exact same argument applies to sk_filter. If that struct is
renamed everywhere else, then the result will only be more confusing.
A forward declaration is the standard workaround to all such cases in
include/uapi/linux/netfilter. See for instance xt_connlimit.h. This is
sufficient to allow userspace build to succeed, without exposing any
kernel structure detail. If you don't even want to leak the name, then
let's make it void *. Keeping a declaration for sk_filter, while
sk_filter is renamed everywhere else is the least good option, in my
opinion.

2014-07-25 18:58:47

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 11:50 AM, Willem de Bruijn <[email protected]> wrote:
> On Fri, Jul 25, 2014 at 2:43 PM, Alexei Starovoitov <[email protected]> wrote:
>> On Fri, Jul 25, 2014 at 11:32 AM, Willem de Bruijn <[email protected]> wrote:
>>>>> This follows a convention in include/uapi/linux/netfilter/*.h that
>>>>> likely predates the introduction of uapi. A search for "Used
>>>>> internally by the kernel" shows many more examples. I should not have
>>>>> included filter.h, however. The common behavior when using pointers
>>>>> to kernel-internal structures is to have a forward declaration. I suggest
>>>>> making that change, instead of changing to void *. This avoids having
>>>>> to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:
>>>>
>>>> that will not avoid typecast.
>>>> Either 'void *' approach or extra 'struct sk_filter;' approach, both need
>>>> type casts to 'struct bpf_prog' in xt_bpf.c
>>>> (because of SK_RUN_FILTER macro)
>>>> Therefore I prefer extra 'struct sk_filter;' approach.
>>>
>>> I hadn't noticed that your patch makes the same change that I
>>> proposed. Nothing in userspace should touch that pointer, so it is
>>> fine to change its type to struct bpf_prog* at the same time. No need
>>> for typecasts.
>>
>> really? I don't think it's a good idea to expose kernel struct type
>> to user space. How is it even going to compile?
>
> a forward declaration.
>
>> #include <linux/filter.h> brings different files in kernel and in user space.
>> struct bpf_prog is undefined in user space and compiler will complain.
>> Adding 'struct bpf_prog;' will be ugly.
>> imo the lesser evil is adding 'struct sk_filter;' and doing type casts
>> in kernel.
>
> but the exact same argument applies to sk_filter. If that struct is
> renamed everywhere else, then the result will only be more confusing.
> A forward declaration is the standard workaround to all such cases in
> include/uapi/linux/netfilter. See for instance xt_connlimit.h. This is
> sufficient to allow userspace build to succeed, without exposing any
> kernel structure detail. If you don't even want to leak the name, then
> let's make it void *. Keeping a declaration for sk_filter, while
> sk_filter is renamed everywhere else is the least good option, in my
> opinion.

well, since you the author of this bit and you're ok with 'void *', I'm ok
with it too :) Just typecast in kernel is still needed because of
SK_RUN_FILTER() macro...

2014-07-25 19:02:15

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 11:58 AM, Alexei Starovoitov <[email protected]> wrote:
> On Fri, Jul 25, 2014 at 11:50 AM, Willem de Bruijn <[email protected]> wrote:
>> On Fri, Jul 25, 2014 at 2:43 PM, Alexei Starovoitov <[email protected]> wrote:
>>> On Fri, Jul 25, 2014 at 11:32 AM, Willem de Bruijn <[email protected]> wrote:
>>>>>> This follows a convention in include/uapi/linux/netfilter/*.h that
>>>>>> likely predates the introduction of uapi. A search for "Used
>>>>>> internally by the kernel" shows many more examples. I should not have
>>>>>> included filter.h, however. The common behavior when using pointers
>>>>>> to kernel-internal structures is to have a forward declaration. I suggest
>>>>>> making that change, instead of changing to void *. This avoids having
>>>>>> to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:
>>>>>
>>>>> that will not avoid typecast.
>>>>> Either 'void *' approach or extra 'struct sk_filter;' approach, both need
>>>>> type casts to 'struct bpf_prog' in xt_bpf.c
>>>>> (because of SK_RUN_FILTER macro)
>>>>> Therefore I prefer extra 'struct sk_filter;' approach.
>>>>
>>>> I hadn't noticed that your patch makes the same change that I
>>>> proposed. Nothing in userspace should touch that pointer, so it is
>>>> fine to change its type to struct bpf_prog* at the same time. No need
>>>> for typecasts.
>>>
>>> really? I don't think it's a good idea to expose kernel struct type
>>> to user space. How is it even going to compile?
>>
>> a forward declaration.
>>
>>> #include <linux/filter.h> brings different files in kernel and in user space.
>>> struct bpf_prog is undefined in user space and compiler will complain.
>>> Adding 'struct bpf_prog;' will be ugly.
>>> imo the lesser evil is adding 'struct sk_filter;' and doing type casts
>>> in kernel.
>>
>> but the exact same argument applies to sk_filter. If that struct is
>> renamed everywhere else, then the result will only be more confusing.
>> A forward declaration is the standard workaround to all such cases in
>> include/uapi/linux/netfilter. See for instance xt_connlimit.h. This is
>> sufficient to allow userspace build to succeed, without exposing any
>> kernel structure detail. If you don't even want to leak the name, then
>> let's make it void *. Keeping a declaration for sk_filter, while
>> sk_filter is renamed everywhere else is the least good option, in my
>> opinion.
>
> well, since you the author of this bit and you're ok with 'void *', I'm ok
> with it too :) Just typecast in kernel is still needed because of
> SK_RUN_FILTER() macro...

just tried with 'void *', actually all three type casts are needed...

2014-07-25 22:17:14

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 10:24:29AM -0700, Alexei Starovoitov wrote:
> On Fri, Jul 25, 2014 at 6:00 AM, Daniel Borkmann <[email protected]> wrote:
> > On 07/25/2014 01:54 PM, Pablo Neira Ayuso wrote:
> >>
> >> On Fri, Jul 25, 2014 at 01:25:35PM +0200, Daniel Borkmann wrote:
> >>>
> >>> [ also Cc'ing Willem, Pablo ]
> >>>
> >>> On 07/25/2014 10:04 AM, Alexei Starovoitov wrote:
> >>>>
> >>>> 'sk_filter' name is used as 'struct sk_filter', function sk_filter() and
> >>>> as variable 'sk_filter', which makes code hard to read.
> >>>> Also it's easily confused with 'struct sock_filter'
> >>>> Rename 'struct sk_filter' to 'struct bpf_prog' to clarify semantics and
> >>>> align the name with generic BPF use model.
> >>>
> >>>
> >>> Agreed, as we went for kernel/bpf/, renaming makes absolutely sense.
> >>
> >>
> >> My nft socket filtering changes are accomodated into struct sk_filter,
> >> and will still be, so I still need some generic name there...
> >
> >
> > All the parts from filter.c which is BPF's core engine have been moved
> > into kernel/bpf/ to get it ready for tracing et al, since there is not
> > always a socket context anymore. The *whole* infrastructure around struct
> > sk_filter is [e]BPF and used in non-net related contexts as well, whereas
> > nft socket filtering is *only* for sockets. Due to the socket-only specific
> > use case why doesn't it make more sense to have a union in struct sock
> > around sk_filter (or however we name it) and only allow one of the two
> > being loaded on a socket?
>
> yep.
> Adding nft specific things to struct sk_filter/bpf_prog is not correct,
> since this struct is already part of seccomp and will be used
> in net-less configurations. SK_RUN_FILTER() macro will also be
> renamed into something like RUN_BPF_RPOG(). It's one and only
> way to invoke eBPF programs. Adding nft selector cannot work,
> since eBPF is used with generic context whereas nft is skb specific.
> If you want to add nft filtering capabilities to sockets, you'd need
> to add union around 'struct bpf_prog' inside 'struct sock', which will be
> much cleaner way.

The struct sk_filter is almost providing the generic framework, it
just needs to be generalized, a quick layout for it:

struct sk_filter {
struct sk_filter_cb *cb;
atomic_t refcnt;
struct rcu_head head;
char data[0]; /* here, you specific struct bpf_prog */
};

The refcnt is required sk_filter_{charge,uncharge,release}. The struct
rcu_head is also need from sk_filter_release().

struct sk_filter_cb {
int type;
struct module *me;
void (*charge)(struct sock *sk, struct sk_filter *fp);
void (*uncharge)(struct sock *sk, struct sk_filter *fp);
unsigned int (*run_filter)(struct sk_filter *fp, struct sk_buff *skb);
};

We have to provide the register/unregister functions for the specific
callbacks depending on the socket filtering approach. But I'll have to
introduce this myself when I come up with the nft patches again.

So meanwhile, you should just encapsulate what really belongs to
struct bpf_prog, ie. size, bytecode, jitted, etc. and leave struct
sk_filter in place.

struct sk_filter {
atomic_t refcnt;
struct rcu_head head;
u32 len;
struct bpf_prog bpf_prog;
};

The len will go into struct bpf_prog once the generic infrastructure
above is introduced since the semantics (number of blocks) is
different from nft.

If you straight forward rename the entire structure, you'll take
things that are not specific from bpf such as refcnt and rcu_head.

2014-07-25 22:20:49

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 02:50:32PM -0400, Willem de Bruijn wrote:
> On Fri, Jul 25, 2014 at 2:43 PM, Alexei Starovoitov <[email protected]> wrote:
> > On Fri, Jul 25, 2014 at 11:32 AM, Willem de Bruijn <[email protected]> wrote:
> >>>> This follows a convention in include/uapi/linux/netfilter/*.h that
> >>>> likely predates the introduction of uapi. A search for "Used
> >>>> internally by the kernel" shows many more examples. I should not have
> >>>> included filter.h, however. The common behavior when using pointers
> >>>> to kernel-internal structures is to have a forward declaration. I suggest
> >>>> making that change, instead of changing to void *. This avoids having
> >>>> to add casts where xt_bpf_info is used in net/netfilter/xt_bpf.c:
> >>>
> >>> that will not avoid typecast.
> >>> Either 'void *' approach or extra 'struct sk_filter;' approach, both need
> >>> type casts to 'struct bpf_prog' in xt_bpf.c
> >>> (because of SK_RUN_FILTER macro)
> >>> Therefore I prefer extra 'struct sk_filter;' approach.
> >>
> >> I hadn't noticed that your patch makes the same change that I
> >> proposed. Nothing in userspace should touch that pointer, so it is
> >> fine to change its type to struct bpf_prog* at the same time. No need
> >> for typecasts.
> >
> > really? I don't think it's a good idea to expose kernel struct type
> > to user space. How is it even going to compile?
>
> a forward declaration.
>
> > #include <linux/filter.h> brings different files in kernel and in user space.
> > struct bpf_prog is undefined in user space and compiler will complain.
> > Adding 'struct bpf_prog;' will be ugly.
> > imo the lesser evil is adding 'struct sk_filter;' and doing type casts
> > in kernel.
>
> but the exact same argument applies to sk_filter. If that struct is
> renamed everywhere else, then the result will only be more confusing.
> A forward declaration is the standard workaround to all such cases in
> include/uapi/linux/netfilter. See for instance xt_connlimit.h. This is
> sufficient to allow userspace build to succeed, without exposing any
> kernel structure detail. If you don't even want to leak the name, then
> let's make it void *. Keeping a declaration for sk_filter, while
> sk_filter is renamed everywhere else is the least good option, in my
> opinion.

Please, send me a patch to remove that include <net/filter.h> from the
uapi header and define struct sk_filter; so we save the typecast in
xt_bpf.c

The struct sk_filter; doesn't expose anything relevant since, even
assuming userspace knows the layout, it can *not* do anything useful
with that.

Thanks.

2014-07-27 05:41:09

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Fri, Jul 25, 2014 at 3:17 PM, Pablo Neira Ayuso <[email protected]> wrote:
> On Fri, Jul 25, 2014 at 10:24:29AM -0700, Alexei Starovoitov wrote:
>> On Fri, Jul 25, 2014 at 6:00 AM, Daniel Borkmann <[email protected]> wrote:
>> > On 07/25/2014 01:54 PM, Pablo Neira Ayuso wrote:
>> >>
>> >> On Fri, Jul 25, 2014 at 01:25:35PM +0200, Daniel Borkmann wrote:
>> >>>
>> >>> [ also Cc'ing Willem, Pablo ]
>> >>>
>> >>> On 07/25/2014 10:04 AM, Alexei Starovoitov wrote:
>> >>>>
>> >>>> 'sk_filter' name is used as 'struct sk_filter', function sk_filter() and
>> >>>> as variable 'sk_filter', which makes code hard to read.
>> >>>> Also it's easily confused with 'struct sock_filter'
>> >>>> Rename 'struct sk_filter' to 'struct bpf_prog' to clarify semantics and
>> >>>> align the name with generic BPF use model.
>> >>>
>> >>>
>> >>> Agreed, as we went for kernel/bpf/, renaming makes absolutely sense.
>> >>
>> >>
>> >> My nft socket filtering changes are accomodated into struct sk_filter,
>> >> and will still be, so I still need some generic name there...
>> >
>> >
>> > All the parts from filter.c which is BPF's core engine have been moved
>> > into kernel/bpf/ to get it ready for tracing et al, since there is not
>> > always a socket context anymore. The *whole* infrastructure around struct
>> > sk_filter is [e]BPF and used in non-net related contexts as well, whereas
>> > nft socket filtering is *only* for sockets. Due to the socket-only specific
>> > use case why doesn't it make more sense to have a union in struct sock
>> > around sk_filter (or however we name it) and only allow one of the two
>> > being loaded on a socket?
>>
>> yep.
>> Adding nft specific things to struct sk_filter/bpf_prog is not correct,
>> since this struct is already part of seccomp and will be used
>> in net-less configurations. SK_RUN_FILTER() macro will also be
>> renamed into something like RUN_BPF_RPOG(). It's one and only
>> way to invoke eBPF programs. Adding nft selector cannot work,
>> since eBPF is used with generic context whereas nft is skb specific.
>> If you want to add nft filtering capabilities to sockets, you'd need
>> to add union around 'struct bpf_prog' inside 'struct sock', which will be
>> much cleaner way.
>
> The struct sk_filter is almost providing the generic framework, it
> just needs to be generalized, a quick layout for it:
>
> struct sk_filter {
> struct sk_filter_cb *cb;
> atomic_t refcnt;
> struct rcu_head head;
> char data[0]; /* here, you specific struct bpf_prog */
> };
>
> The refcnt is required sk_filter_{charge,uncharge,release}. The struct
> rcu_head is also need from sk_filter_release().
>
> struct sk_filter_cb {
> int type;
> struct module *me;
> void (*charge)(struct sock *sk, struct sk_filter *fp);
> void (*uncharge)(struct sock *sk, struct sk_filter *fp);
> unsigned int (*run_filter)(struct sk_filter *fp, struct sk_buff *skb);
> };

Pablo,

I don't think you understand the scope of BPF.
'struct module *'? to attach nft to sockets? ouch.
if you need to do it, go ahead, but please don't present such mess
as a 'generic' infra.
What you're proposing won't work even for classic bpf.
'struct sock *' and 'struct module *' are totally redundant and won't
work for seccomp, tracing and pretty much everything, but pure
socket filtering.
Somehow you think that 'struct sk_filter' is only used in sockets.
That is not the case. Just grep the git.
and arguing that 'struct sk_filter' needs to be generalized for nft
is shortsighted as minimum.
Every piece of code that is using 'struct sk_filter' knows that it's
dealing with bpf/ebpf, so renaming is not changing the facts,
but rather stating the obvious and making code easier to
understand.

> We have to provide the register/unregister functions for the specific
> callbacks depending on the socket filtering approach. But I'll have to
> introduce this myself when I come up with the nft patches again.

yes, please do introduce modules for nft only, since it doesn't
make sense for sockets and for bpf.

> So meanwhile, you should just encapsulate what really belongs to
> struct bpf_prog, ie. size, bytecode, jitted, etc. and leave struct
> sk_filter in place.
>
> struct sk_filter {
> atomic_t refcnt;
> struct rcu_head head;
> u32 len;
> struct bpf_prog bpf_prog;
> };
>
> The len will go into struct bpf_prog once the generic infrastructure
> above is introduced since the semantics (number of blocks) is
> different from nft.
>
> If you straight forward rename the entire structure, you'll take
> things that are not specific from bpf such as refcnt and rcu_head.

sorry Pablo, I don't think you understand what you're talking about.
refcnt and rcu_head are key parts of bpf/ebpf infra.
Programs are refcnt'ed not only by charging sockets. BPF is relying
on rcu for safe execution as well.
For example see my tracing filter patch that does:
void trace_filter_call_bpf(struct event_filter *filter, void *ctx)
{
rcu_read_lock();
SK_RUN_FILTER(filter->prog, ctx);
rcu_read_unlock();
}
One eBPF program can be attached to multiple 'events' where event
is kprobe, tracepoint, syscall, etc The act of attaching increments
'struct sk_filter/bpf_prog -> refcnt'.
Every field of 'struct sk_filter' is used by BPF infra (including 'len'
and 'work') This struct is a definition of bpf program.
Seriously if you want to understand, just ask, but objecting due to
lack of understanding just silly.

2014-07-28 21:45:48

by Pablo Neira Ayuso

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Sat, Jul 26, 2014 at 10:41:04PM -0700, Alexei Starovoitov wrote:
> On Fri, Jul 25, 2014 at 3:17 PM, Pablo Neira Ayuso <[email protected]> wrote:
> > The struct sk_filter is almost providing the generic framework, it
> > just needs to be generalized, a quick layout for it:
> >
> > struct sk_filter {
> > struct sk_filter_cb *cb;
> > atomic_t refcnt;
> > struct rcu_head head;
> > char data[0]; /* here, you specific struct bpf_prog */
> > };
> >
> > The refcnt is required sk_filter_{charge,uncharge,release}. The struct
> > rcu_head is also need from sk_filter_release().
> >
> > struct sk_filter_cb {
> > int type;
> > struct module *me;
> > void (*charge)(struct sock *sk, struct sk_filter *fp);
> > void (*uncharge)(struct sock *sk, struct sk_filter *fp);
> > unsigned int (*run_filter)(struct sk_filter *fp, struct sk_buff *skb);
> > };
>
> Pablo,
>
> I don't think you understand the scope of BPF.
> 'struct module *'? to attach nft to sockets? ouch.

The idea is that there will be one sk_filter_cb per socket filtering
approach. The structure module is just there in case one of the
approach is loadable as kernel module, it's the typical code pattern
in the kernel. You can git grep for similar code.

> if you need to do it, go ahead, but please don't present such mess
> as a 'generic' infra.

This is extracted from one of your recent patches:

void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
{
- atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
+ if (!fp->ebpf)
+ atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
sk_filter_release(fp);
}

void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
atomic_inc(&fp->refcnt);
- atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
+ if (!fp->ebpf)
+ atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
}

Basically, that looks to me like two different socket filtering
approach. You only have to define the struct sock_filter_cb, set the
fields to point to the functions and register the approach in the
socket filtering engine. That will allow a simple way to look up the
filtering approach when loading the filter from userspace.

[...]
> What you're proposing won't work even for classic bpf.
> 'struct sock *' and 'struct module *' are totally redundant and won't
> work for seccomp, tracing and pretty much everything, but pure
> socket filtering.
> Somehow you think that 'struct sk_filter' is only used in sockets.
[...]
> > If you straight forward rename the entire structure, you'll take
> > things that are not specific from bpf such as refcnt and rcu_head.
>
> sorry Pablo, I don't think you understand what you're talking about.
> refcnt and rcu_head are key parts of bpf/ebpf infra.
> Programs are refcnt'ed not only by charging sockets. BPF is relying
> on rcu for safe execution as well.
> For example see my tracing filter patch that does:
> void trace_filter_call_bpf(struct event_filter *filter, void *ctx)
> {
> rcu_read_lock();
> SK_RUN_FILTER(filter->prog, ctx);
> rcu_read_unlock();
> }
> One eBPF program can be attached to multiple 'events' where event
> is kprobe, tracepoint, syscall, etc The act of attaching increments
> 'struct sk_filter/bpf_prog -> refcnt'.

By quick git grepping you already can find clients of this that do not
need rcu / refcount: cls_bpf.c, net_cls.c, xt_bpf.c and
ptp_classifier. Moreover, I only see the refcnt bumped from
sk_filter_charge(), I didn't find it neither in git nor in your
patches. I don't think rcu_head and refcnt are really part of the
filter *in any case*, they just provide the way to link/unlink objects
in a safe way in some situations.

By renaming this, you're not fixing up things the semantics. It seems
to me you just want to find a quick path to solve inconsistencies in
your code.

2014-07-29 00:12:17

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

From: Pablo Neira Ayuso <[email protected]>
Date: Mon, 28 Jul 2014 23:45:52 +0200

> By renaming this, you're not fixing up things the semantics. It seems
> to me you just want to find a quick path to solve inconsistencies in
> your code.

Agreed, this looks just like messing around with naming to me.

But to the original issue, that of xt_bpf, I wonder about a few things:

1) If we have a kernel pointer embedded in a user provided datastructure,
what takes care of 32-bit compat applications uploading xt_bpf rules
on a 64-bit kernel? Won't the size be wrong or does it not matter
and is in some way helped by that 8-byte alignment thing there?

2) The user can't care about the type of "filter" in xt_bpf_info, so
we can use whatever name we want for the type.

Therefore you can just do something like:


struct bpf_prog;

struct xt_bpf_info {
__u16 bpf_program_num_elem;
struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];

/* only used in the kernel */
struct bpf_prog *filter __attribute__((aligned(8)));
};

and then you won't need any casting.

2014-07-29 01:12:12

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

On Mon, Jul 28, 2014 at 2:45 PM, Pablo Neira Ayuso <[email protected]> wrote:
>> > struct sk_filter_cb {
>> > int type;
>> > struct module *me;
>> > void (*charge)(struct sock *sk, struct sk_filter *fp);
>> > void (*uncharge)(struct sock *sk, struct sk_filter *fp);
>> > unsigned int (*run_filter)(struct sk_filter *fp, struct sk_buff *skb);
>> > };
>>
>> Pablo,
>>
>> I don't think you understand the scope of BPF.
>> 'struct module *'? to attach nft to sockets? ouch.
>
> The idea is that there will be one sk_filter_cb per socket filtering
> approach. The structure module is just there in case one of the
> approach is loadable as kernel module, it's the typical code pattern
> in the kernel. You can git grep for similar code.

socket filtering is available to unprivileged users.
So you're proposing to let them increment refcnt of modules?!
That's not secure.
You're also ignoring kernel address leak issue of xt_bpf
pointed out earlier. Userspace should not be able to see kernel
address inside 'struct xt_bpf_info'.
xtables need some sort of scrub_matchinfo callback to clean it
before passing back to user.

> This is extracted from one of your recent patches:
>
> void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
> {
> - atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
> + if (!fp->ebpf)
> + atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
> sk_filter_release(fp);
> }
>
> void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
> {
> atomic_inc(&fp->refcnt);
> - atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
> + if (!fp->ebpf)
> + atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
> }
>
> Basically, that looks to me like two different socket filtering
> approach. You only have to define the struct sock_filter_cb, set the
> fields to point to the functions and register the approach in the
> socket filtering engine. That will allow a simple way to look up the
> filtering approach when loading the filter from userspace.

You're taking things out of context. Quoted code is from patch #9.
This rename is patch #0
Once it's all properly named it will look like:
void sk_filter_charge(struct sock *sk, struct bpf_prog *fp)
{
atomic_inc(&fp->refcnt);
if (!fp->ebpf)
atomic_add(bpf_filter_size(fp->len), &sk->sk_omem_alloc);
}

so it's one sk_filter_charge() function to deal with two variants of
bpf_prog (native ebpf and converted to ebpf).
In all cases the programs inside are in ebpf isa.
fp->ebpf flag means that program arrived from userspace as
native ebpf (and not was a result of conversion from classic).
So splitting this function in two would not make sense at all.

> By quick git grepping you already can find clients of this that do not
> need rcu / refcount: cls_bpf.c, net_cls.c, xt_bpf.c and

you can also see a lot of clients that don't use jit and 'work' field
is unused. That doesn't mean that these flags should be in
different structure.

> ptp_classifier. Moreover, I only see the refcnt bumped from
> sk_filter_charge(), I didn't find it neither in git nor in your
> patches.

in the patch #9 that you already quoted there is this part:

+/* called from sk_attach_filter_ebpf() or from tracing filter attach
+ * pairs with
+ * sk_detach_filter()->sk_filter_uncharge()->sk_filter_release()
+ * or with
+ * sk_unattached_filter_destroy()->sk_filter_release()
+ */
+struct sk_filter *bpf_prog_get(u32 ufd)
+{
+ struct fd f = fdget(ufd);
+ struct sk_filter *prog;
+
+ prog = get_prog(f);
+
+ if (IS_ERR(prog))
+ return prog;
+
+ atomic_inc(&prog->refcnt);
+ fdput(f);
+ return prog;
+}

see how 'native ebpf' reuses all existing structs?
Only renaming of 'struct sk_filter' and 'sk_filter_release()' is missing.
sk_filter_uncharge() name should stay as-is, since it is working
on 'struct sock*', whereas sk_filter_release() is working on bpf prog,
so should be renamed as well into 'bpf_prog_release()'

> I don't think rcu_head and refcnt are really part of the
> filter *in any case*, they just provide the way to link/unlink objects
> in a safe way in some situations.

what is a program then? a sequence of instructions only? if so,
what do you call a structure that carries refcnt, rcu, work and flags?
Using your logic task_struct should only have fields that describe
the task and all auxiliary fields should be moved to different struct?

> By renaming this, you're not fixing up things the semantics. It seems
> to me you just want to find a quick path to solve inconsistencies in
> your code.

Please point to inconsistencies.
It sounds to me that you're arguing only because you think that this
renaming will make it harder for you to add nft to socket filtering.
That is not the case. sk_filter_cb can be added later.

> Agreed, this looks just like messing around with naming to me.

guys, I don't see an alternative to renaming. All fields of 'struct sk_filter'
are used and needed to be part of bpf program.
Just look at 'bpf: expand BPF syscall with program load/unload' patch:
https://git.kernel.org/cgit/linux/kernel/git/ast/bpf.git/commit/?id=7a5b36ee1cb57e5fcb3e2414645dc9f5fdc3c404

There I blend 'native epbf' into 'struct sk_filter' like:
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -30,12 +30,17 @@ struct sock_fprog_kern {
struct sk_buff;
struct sock;
struct seccomp_data;
+struct bpf_prog_info;

struct sk_filter {
atomic_t refcnt;
u32 jited:1, /* Is our filter JIT'ed? */
- len:31; /* Number of filter blocks */
- struct sock_fprog_kern *orig_prog; /* Original BPF program */
+ ebpf:1, /* Is it eBPF program ? */
+ len:30; /* Number of filter blocks */
+ union {
+ struct sock_fprog_kern *orig_prog; /* Original
BPF program */
+ struct bpf_prog_info *info;
+ };
struct rcu_head rcu;
unsigned int (*bpf_func)(const struct sk_buff *skb,
const struct bpf_insn *filter);

where 'struct bpf_prog_info' carries additional info about bpf maps, etc:
that is ideal code reuse. All existing and new ebpf infra and structures
are common and shared. Only 'struct sk_filter' name doesn't make sense.

The alternative is to copy paste all of 'struct sk_filter' fields into new
structure ? You seriously think it's a better option?

I cannot see how the arguments about some future sk_filter_cb apply here.
When we get to the point of having multiple socket filtering engines,
we can add new 'struct sk_filter' and callbacks if necessary.
Today 'struct sk_filter' is all about bpf. Keep calling it something else
but 'bpf_prog' just denying the reality.

2014-07-29 01:16:24

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net-next] net: filter: rename 'struct sk_filter' to 'struct bpf_prog'

From: Alexei Starovoitov <[email protected]>
Date: Mon, 28 Jul 2014 18:12:05 -0700

> On Mon, Jul 28, 2014 at 2:45 PM, Pablo Neira Ayuso <[email protected]> wrote:
>>> > struct sk_filter_cb {
>>> > int type;
>>> > struct module *me;
>>> > void (*charge)(struct sock *sk, struct sk_filter *fp);
>>> > void (*uncharge)(struct sock *sk, struct sk_filter *fp);
>>> > unsigned int (*run_filter)(struct sk_filter *fp, struct sk_buff *skb);
>>> > };
>>>
>>> Pablo,
>>>
>>> I don't think you understand the scope of BPF.
>>> 'struct module *'? to attach nft to sockets? ouch.
>>
>> The idea is that there will be one sk_filter_cb per socket filtering
>> approach. The structure module is just there in case one of the
>> approach is loadable as kernel module, it's the typical code pattern
>> in the kernel. You can git grep for similar code.
>
> socket filtering is available to unprivileged users.
> So you're proposing to let them increment refcnt of modules?!
> That's not secure.

It's impossible to avoid, and really is nothing new.

Users can open sockets, and that holds a reference to the module
implementing that protocol. Is that not secure too?

This discussion is degenerating into nonsense, please stop ignoring
Pablo's core points.

Thanks.