Adding support to call bpf_get_attach_cookie helper from
kprobe program attached by fprobe link.
The bpf_cookie is provided by array of u64 values, where
each value is paired with provided function address with
the same array index.
Suggested-by: Andrii Nakryiko <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
---
include/linux/bpf.h | 2 +
include/uapi/linux/bpf.h | 1 +
kernel/bpf/syscall.c | 83 +++++++++++++++++++++++++++++++++-
kernel/trace/bpf_trace.c | 16 ++++++-
tools/include/uapi/linux/bpf.h | 1 +
5 files changed, 100 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6eb0b180d33b..7b65f05c0487 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1301,6 +1301,8 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
#endif
}
+u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip);
+
/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
/* BPF program asks to set CN on the packet. */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c0912f0a3dfe..0dc6aa4f9683 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1484,6 +1484,7 @@ union bpf_attr {
__aligned_u64 addrs;
__u32 cnt;
__u32 flags;
+ __aligned_u64 bpf_cookies;
} fprobe;
};
} link_create;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0cfbb112c8e1..6c5e74bc43b6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -33,6 +33,8 @@
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
#include <linux/fprobe.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -3025,10 +3027,18 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro
#ifdef CONFIG_FPROBE
+struct bpf_fprobe_cookie {
+ unsigned long addr;
+ u64 bpf_cookie;
+};
+
struct bpf_fprobe_link {
struct bpf_link link;
struct fprobe fp;
unsigned long *addrs;
+ struct bpf_run_ctx run_ctx;
+ struct bpf_fprobe_cookie *bpf_cookies;
+ u32 cnt;
};
static void bpf_fprobe_link_release(struct bpf_link *link)
@@ -3045,6 +3055,7 @@ static void bpf_fprobe_link_dealloc(struct bpf_link *link)
fprobe_link = container_of(link, struct bpf_fprobe_link, link);
kfree(fprobe_link->addrs);
+ kfree(fprobe_link->bpf_cookies);
kfree(fprobe_link);
}
@@ -3053,9 +3064,37 @@ static const struct bpf_link_ops bpf_fprobe_link_lops = {
.dealloc = bpf_fprobe_link_dealloc,
};
+static int bpf_fprobe_cookie_cmp(const void *_a, const void *_b)
+{
+ const struct bpf_fprobe_cookie *a = _a;
+ const struct bpf_fprobe_cookie *b = _b;
+
+ if (a->addr == b->addr)
+ return 0;
+ return a->addr < b->addr ? -1 : 1;
+}
+
+u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip)
+{
+ struct bpf_fprobe_link *fprobe_link;
+ struct bpf_fprobe_cookie *val, key = {
+ .addr = (unsigned long) ip,
+ };
+
+ if (!ctx)
+ return 0;
+ fprobe_link = container_of(ctx, struct bpf_fprobe_link, run_ctx);
+ if (!fprobe_link->bpf_cookies)
+ return 0;
+ val = bsearch(&key, fprobe_link->bpf_cookies, fprobe_link->cnt,
+ sizeof(key), bpf_fprobe_cookie_cmp);
+ return val ? val->bpf_cookie : 0;
+}
+
static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
struct pt_regs *regs)
{
+ struct bpf_run_ctx *old_run_ctx;
int err;
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -3063,12 +3102,16 @@ static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
goto out;
}
+ old_run_ctx = bpf_set_run_ctx(&fprobe_link->run_ctx);
+
rcu_read_lock();
migrate_disable();
err = bpf_prog_run(fprobe_link->link.prog, regs);
migrate_enable();
rcu_read_unlock();
+ bpf_reset_run_ctx(old_run_ctx);
+
out:
__this_cpu_dec(bpf_prog_active);
return err;
@@ -3161,10 +3204,12 @@ static int fprobe_resolve_syms(const void *usyms, u32 cnt,
static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
+ struct bpf_fprobe_cookie *bpf_cookies = NULL;
struct bpf_fprobe_link *link = NULL;
struct bpf_link_primer link_primer;
+ void __user *ubpf_cookies;
+ u32 flags, cnt, i, size;
unsigned long *addrs;
- u32 flags, cnt, size;
void __user *uaddrs;
void __user *usyms;
int err;
@@ -3205,6 +3250,37 @@ static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *p
goto error;
}
+ ubpf_cookies = u64_to_user_ptr(attr->link_create.fprobe.bpf_cookies);
+ if (ubpf_cookies) {
+ u64 *tmp;
+
+ err = -ENOMEM;
+ tmp = kzalloc(size, GFP_KERNEL);
+ if (!tmp)
+ goto error;
+
+ if (copy_from_user(tmp, ubpf_cookies, size)) {
+ kfree(tmp);
+ err = -EFAULT;
+ goto error;
+ }
+
+ size = cnt * sizeof(*bpf_cookies);
+ bpf_cookies = kzalloc(size, GFP_KERNEL);
+ if (!bpf_cookies) {
+ kfree(tmp);
+ goto error;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ bpf_cookies[i].addr = addrs[i];
+ bpf_cookies[i].bpf_cookie = tmp[i];
+ }
+
+ sort(bpf_cookies, cnt, sizeof(*bpf_cookies), bpf_fprobe_cookie_cmp, NULL);
+ kfree(tmp);
+ }
+
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
@@ -3224,6 +3300,8 @@ static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *p
link->fp.entry_handler = fprobe_link_entry_handler;
link->addrs = addrs;
+ link->bpf_cookies = bpf_cookies;
+ link->cnt = cnt;
err = register_fprobe_ips(&link->fp, addrs, cnt);
if (err) {
@@ -3236,6 +3314,7 @@ static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *p
error:
kfree(link);
kfree(addrs);
+ kfree(bpf_cookies);
return err;
}
#else /* !CONFIG_FPROBE */
@@ -4476,7 +4555,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
return -EINVAL;
}
-#define BPF_LINK_CREATE_LAST_FIELD link_create.fprobe.flags
+#define BPF_LINK_CREATE_LAST_FIELD link_create.fprobe.bpf_cookies
static int link_create(union bpf_attr *attr, bpfptr_t uattr)
{
enum bpf_prog_type ptype;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 28e59e31e3db..b54b2ef93928 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1049,6 +1049,18 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_fprobe = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_1(bpf_get_attach_cookie_fprobe, struct pt_regs *, regs)
+{
+ return bpf_fprobe_cookie(current->bpf_ctx, regs->ip);
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_fprobe = {
+ .func = bpf_get_attach_cookie_fprobe,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
{
struct bpf_trace_run_ctx *run_ctx;
@@ -1295,7 +1307,9 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->expected_attach_type == BPF_TRACE_FPROBE ?
&bpf_get_func_ip_proto_fprobe : &bpf_get_func_ip_proto_kprobe;
case BPF_FUNC_get_attach_cookie:
- return &bpf_get_attach_cookie_proto_trace;
+ return prog->expected_attach_type == BPF_TRACE_FPROBE ?
+ &bpf_get_attach_cookie_proto_fprobe :
+ &bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c0912f0a3dfe..0dc6aa4f9683 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1484,6 +1484,7 @@ union bpf_attr {
__aligned_u64 addrs;
__u32 cnt;
__u32 flags;
+ __aligned_u64 bpf_cookies;
} fprobe;
};
} link_create;
--
2.34.1
On Wed, Feb 2, 2022 at 5:54 AM Jiri Olsa <[email protected]> wrote:
>
> Adding support to call bpf_get_attach_cookie helper from
> kprobe program attached by fprobe link.
>
> The bpf_cookie is provided by array of u64 values, where
> each value is paired with provided function address with
> the same array index.
>
> Suggested-by: Andrii Nakryiko <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> include/linux/bpf.h | 2 +
> include/uapi/linux/bpf.h | 1 +
> kernel/bpf/syscall.c | 83 +++++++++++++++++++++++++++++++++-
> kernel/trace/bpf_trace.c | 16 ++++++-
> tools/include/uapi/linux/bpf.h | 1 +
> 5 files changed, 100 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 6eb0b180d33b..7b65f05c0487 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1301,6 +1301,8 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> #endif
> }
>
> +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip);
> +
> /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
> #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
> /* BPF program asks to set CN on the packet. */
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c0912f0a3dfe..0dc6aa4f9683 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1484,6 +1484,7 @@ union bpf_attr {
> __aligned_u64 addrs;
> __u32 cnt;
> __u32 flags;
> + __aligned_u64 bpf_cookies;
maybe put it right after addrs, they are closely related and cnt
describes all of syms/addrs/cookies.
> } fprobe;
> };
> } link_create;
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 0cfbb112c8e1..6c5e74bc43b6 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -33,6 +33,8 @@
> #include <linux/rcupdate_trace.h>
> #include <linux/memcontrol.h>
> #include <linux/fprobe.h>
> +#include <linux/bsearch.h>
> +#include <linux/sort.h>
>
> #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
> (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
> @@ -3025,10 +3027,18 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro
>
> #ifdef CONFIG_FPROBE
>
> +struct bpf_fprobe_cookie {
> + unsigned long addr;
> + u64 bpf_cookie;
> +};
> +
> struct bpf_fprobe_link {
> struct bpf_link link;
> struct fprobe fp;
> unsigned long *addrs;
> + struct bpf_run_ctx run_ctx;
> + struct bpf_fprobe_cookie *bpf_cookies;
you already have all the addrs above, why keeping a second copy of
each addrs in bpf_fprobe_cookie. Let's have two arrays: addrs
(unsigned long) and cookies (u64) and make sure that they are sorted
together. Then lookup addrs, calculate index, use that index to fetch
cookie.
Seems like sort_r() provides exactly the interface you'd need to do
this very easily. Having addrs separate from cookies also a bit
advantageous in terms of TLB misses (if you need any more persuasion
;)
> + u32 cnt;
> };
>
> static void bpf_fprobe_link_release(struct bpf_link *link)
> @@ -3045,6 +3055,7 @@ static void bpf_fprobe_link_dealloc(struct bpf_link *link)
>
> fprobe_link = container_of(link, struct bpf_fprobe_link, link);
> kfree(fprobe_link->addrs);
> + kfree(fprobe_link->bpf_cookies);
> kfree(fprobe_link);
> }
>
> @@ -3053,9 +3064,37 @@ static const struct bpf_link_ops bpf_fprobe_link_lops = {
> .dealloc = bpf_fprobe_link_dealloc,
> };
>
> +static int bpf_fprobe_cookie_cmp(const void *_a, const void *_b)
> +{
> + const struct bpf_fprobe_cookie *a = _a;
> + const struct bpf_fprobe_cookie *b = _b;
> +
> + if (a->addr == b->addr)
> + return 0;
> + return a->addr < b->addr ? -1 : 1;
> +}
> +
> +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip)
> +{
> + struct bpf_fprobe_link *fprobe_link;
> + struct bpf_fprobe_cookie *val, key = {
> + .addr = (unsigned long) ip,
> + };
> +
> + if (!ctx)
> + return 0;
is it allowed to have ctx == NULL?
> + fprobe_link = container_of(ctx, struct bpf_fprobe_link, run_ctx);
> + if (!fprobe_link->bpf_cookies)
> + return 0;
> + val = bsearch(&key, fprobe_link->bpf_cookies, fprobe_link->cnt,
> + sizeof(key), bpf_fprobe_cookie_cmp);
> + return val ? val->bpf_cookie : 0;
> +}
> +
> static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
> struct pt_regs *regs)
> {
> + struct bpf_run_ctx *old_run_ctx;
> int err;
>
> if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> @@ -3063,12 +3102,16 @@ static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
> goto out;
> }
>
> + old_run_ctx = bpf_set_run_ctx(&fprobe_link->run_ctx);
> +
> rcu_read_lock();
> migrate_disable();
> err = bpf_prog_run(fprobe_link->link.prog, regs);
> migrate_enable();
> rcu_read_unlock();
>
> + bpf_reset_run_ctx(old_run_ctx);
> +
> out:
> __this_cpu_dec(bpf_prog_active);
> return err;
> @@ -3161,10 +3204,12 @@ static int fprobe_resolve_syms(const void *usyms, u32 cnt,
>
> static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> {
> + struct bpf_fprobe_cookie *bpf_cookies = NULL;
> struct bpf_fprobe_link *link = NULL;
> struct bpf_link_primer link_primer;
> + void __user *ubpf_cookies;
> + u32 flags, cnt, i, size;
> unsigned long *addrs;
> - u32 flags, cnt, size;
> void __user *uaddrs;
> void __user *usyms;
> int err;
> @@ -3205,6 +3250,37 @@ static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *p
> goto error;
> }
>
> + ubpf_cookies = u64_to_user_ptr(attr->link_create.fprobe.bpf_cookies);
nit: let's call all this "cookies", this bpf_ prefix feels a bit
redundant (I know about perf_event.bpf_cookie, but still).
> + if (ubpf_cookies) {
> + u64 *tmp;
> +
> + err = -ENOMEM;
> + tmp = kzalloc(size, GFP_KERNEL);
kvmalloc?
> + if (!tmp)
> + goto error;
> +
> + if (copy_from_user(tmp, ubpf_cookies, size)) {
> + kfree(tmp);
> + err = -EFAULT;
> + goto error;
> + }
> +
[...]
On Mon, Feb 07, 2022 at 10:59:21AM -0800, Andrii Nakryiko wrote:
> On Wed, Feb 2, 2022 at 5:54 AM Jiri Olsa <[email protected]> wrote:
> >
> > Adding support to call bpf_get_attach_cookie helper from
> > kprobe program attached by fprobe link.
> >
> > The bpf_cookie is provided by array of u64 values, where
> > each value is paired with provided function address with
> > the same array index.
> >
> > Suggested-by: Andrii Nakryiko <[email protected]>
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > include/linux/bpf.h | 2 +
> > include/uapi/linux/bpf.h | 1 +
> > kernel/bpf/syscall.c | 83 +++++++++++++++++++++++++++++++++-
> > kernel/trace/bpf_trace.c | 16 ++++++-
> > tools/include/uapi/linux/bpf.h | 1 +
> > 5 files changed, 100 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 6eb0b180d33b..7b65f05c0487 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -1301,6 +1301,8 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> > #endif
> > }
> >
> > +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip);
> > +
> > /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
> > #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
> > /* BPF program asks to set CN on the packet. */
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index c0912f0a3dfe..0dc6aa4f9683 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -1484,6 +1484,7 @@ union bpf_attr {
> > __aligned_u64 addrs;
> > __u32 cnt;
> > __u32 flags;
> > + __aligned_u64 bpf_cookies;
>
> maybe put it right after addrs, they are closely related and cnt
> describes all of syms/addrs/cookies.
ok
>
> > } fprobe;
> > };
> > } link_create;
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 0cfbb112c8e1..6c5e74bc43b6 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -33,6 +33,8 @@
> > #include <linux/rcupdate_trace.h>
> > #include <linux/memcontrol.h>
> > #include <linux/fprobe.h>
> > +#include <linux/bsearch.h>
> > +#include <linux/sort.h>
> >
> > #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
> > (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
> > @@ -3025,10 +3027,18 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro
> >
> > #ifdef CONFIG_FPROBE
> >
> > +struct bpf_fprobe_cookie {
> > + unsigned long addr;
> > + u64 bpf_cookie;
> > +};
> > +
> > struct bpf_fprobe_link {
> > struct bpf_link link;
> > struct fprobe fp;
> > unsigned long *addrs;
> > + struct bpf_run_ctx run_ctx;
> > + struct bpf_fprobe_cookie *bpf_cookies;
>
> you already have all the addrs above, why keeping a second copy of
> each addrs in bpf_fprobe_cookie. Let's have two arrays: addrs
> (unsigned long) and cookies (u64) and make sure that they are sorted
> together. Then lookup addrs, calculate index, use that index to fetch
> cookie.
>
> Seems like sort_r() provides exactly the interface you'd need to do
> this very easily. Having addrs separate from cookies also a bit
> advantageous in terms of TLB misses (if you need any more persuasion
> ;)
no persuation needed, I actually tried that but it turned out sort_r
is not ready yet ;-)
because you can't pass priv pointer to the swap callback, so we can't
swap the other array.. I did a change to allow that, but it's not trivial
and will need some bigger testing/review because the original sort
calls sort_r, and of course there are many 'sort' users ;-)
>
> > + u32 cnt;
> > };
> >
> > static void bpf_fprobe_link_release(struct bpf_link *link)
> > @@ -3045,6 +3055,7 @@ static void bpf_fprobe_link_dealloc(struct bpf_link *link)
> >
> > fprobe_link = container_of(link, struct bpf_fprobe_link, link);
> > kfree(fprobe_link->addrs);
> > + kfree(fprobe_link->bpf_cookies);
> > kfree(fprobe_link);
> > }
> >
> > @@ -3053,9 +3064,37 @@ static const struct bpf_link_ops bpf_fprobe_link_lops = {
> > .dealloc = bpf_fprobe_link_dealloc,
> > };
> >
> > +static int bpf_fprobe_cookie_cmp(const void *_a, const void *_b)
> > +{
> > + const struct bpf_fprobe_cookie *a = _a;
> > + const struct bpf_fprobe_cookie *b = _b;
> > +
> > + if (a->addr == b->addr)
> > + return 0;
> > + return a->addr < b->addr ? -1 : 1;
> > +}
> > +
> > +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip)
> > +{
> > + struct bpf_fprobe_link *fprobe_link;
> > + struct bpf_fprobe_cookie *val, key = {
> > + .addr = (unsigned long) ip,
> > + };
> > +
> > + if (!ctx)
> > + return 0;
>
> is it allowed to have ctx == NULL?
nope, I was also thinking this is more 'WARN_ON[_ONCE]' check
>
> > + fprobe_link = container_of(ctx, struct bpf_fprobe_link, run_ctx);
> > + if (!fprobe_link->bpf_cookies)
> > + return 0;
> > + val = bsearch(&key, fprobe_link->bpf_cookies, fprobe_link->cnt,
> > + sizeof(key), bpf_fprobe_cookie_cmp);
> > + return val ? val->bpf_cookie : 0;
> > +}
> > +
> > static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
> > struct pt_regs *regs)
> > {
> > + struct bpf_run_ctx *old_run_ctx;
> > int err;
> >
> > if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> > @@ -3063,12 +3102,16 @@ static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
> > goto out;
> > }
> >
> > + old_run_ctx = bpf_set_run_ctx(&fprobe_link->run_ctx);
> > +
> > rcu_read_lock();
> > migrate_disable();
> > err = bpf_prog_run(fprobe_link->link.prog, regs);
> > migrate_enable();
> > rcu_read_unlock();
> >
> > + bpf_reset_run_ctx(old_run_ctx);
> > +
> > out:
> > __this_cpu_dec(bpf_prog_active);
> > return err;
> > @@ -3161,10 +3204,12 @@ static int fprobe_resolve_syms(const void *usyms, u32 cnt,
> >
> > static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> > {
> > + struct bpf_fprobe_cookie *bpf_cookies = NULL;
> > struct bpf_fprobe_link *link = NULL;
> > struct bpf_link_primer link_primer;
> > + void __user *ubpf_cookies;
> > + u32 flags, cnt, i, size;
> > unsigned long *addrs;
> > - u32 flags, cnt, size;
> > void __user *uaddrs;
> > void __user *usyms;
> > int err;
> > @@ -3205,6 +3250,37 @@ static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *p
> > goto error;
> > }
> >
> > + ubpf_cookies = u64_to_user_ptr(attr->link_create.fprobe.bpf_cookies);
>
> nit: let's call all this "cookies", this bpf_ prefix feels a bit
> redundant (I know about perf_event.bpf_cookie, but still).
ok
>
> > + if (ubpf_cookies) {
> > + u64 *tmp;
> > +
> > + err = -ENOMEM;
> > + tmp = kzalloc(size, GFP_KERNEL);
>
> kvmalloc?
ok
thanks,
jirka
On Tue, Feb 8, 2022 at 3:46 PM Jiri Olsa <[email protected]> wrote:
>
> On Tue, Feb 08, 2022 at 03:35:24PM -0800, Andrii Nakryiko wrote:
> > On Tue, Feb 8, 2022 at 1:07 AM Jiri Olsa <[email protected]> wrote:
> > >
> > > On Mon, Feb 07, 2022 at 10:59:21AM -0800, Andrii Nakryiko wrote:
> > > > On Wed, Feb 2, 2022 at 5:54 AM Jiri Olsa <[email protected]> wrote:
> > > > >
> > > > > Adding support to call bpf_get_attach_cookie helper from
> > > > > kprobe program attached by fprobe link.
> > > > >
> > > > > The bpf_cookie is provided by array of u64 values, where
> > > > > each value is paired with provided function address with
> > > > > the same array index.
> > > > >
> > > > > Suggested-by: Andrii Nakryiko <[email protected]>
> > > > > Signed-off-by: Jiri Olsa <[email protected]>
> > > > > ---
> > > > > include/linux/bpf.h | 2 +
> > > > > include/uapi/linux/bpf.h | 1 +
> > > > > kernel/bpf/syscall.c | 83 +++++++++++++++++++++++++++++++++-
> > > > > kernel/trace/bpf_trace.c | 16 ++++++-
> > > > > tools/include/uapi/linux/bpf.h | 1 +
> > > > > 5 files changed, 100 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > index 6eb0b180d33b..7b65f05c0487 100644
> > > > > --- a/include/linux/bpf.h
> > > > > +++ b/include/linux/bpf.h
> > > > > @@ -1301,6 +1301,8 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> > > > > #endif
> > > > > }
> > > > >
> > > > > +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip);
> > > > > +
> > > > > /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
> > > > > #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
> > > > > /* BPF program asks to set CN on the packet. */
> > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > > index c0912f0a3dfe..0dc6aa4f9683 100644
> > > > > --- a/include/uapi/linux/bpf.h
> > > > > +++ b/include/uapi/linux/bpf.h
> > > > > @@ -1484,6 +1484,7 @@ union bpf_attr {
> > > > > __aligned_u64 addrs;
> > > > > __u32 cnt;
> > > > > __u32 flags;
> > > > > + __aligned_u64 bpf_cookies;
> > > >
> > > > maybe put it right after addrs, they are closely related and cnt
> > > > describes all of syms/addrs/cookies.
> > >
> > > ok
> > >
> > > >
> > > > > } fprobe;
> > > > > };
> > > > > } link_create;
> > > > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > > > index 0cfbb112c8e1..6c5e74bc43b6 100644
> > > > > --- a/kernel/bpf/syscall.c
> > > > > +++ b/kernel/bpf/syscall.c
> > > > > @@ -33,6 +33,8 @@
> > > > > #include <linux/rcupdate_trace.h>
> > > > > #include <linux/memcontrol.h>
> > > > > #include <linux/fprobe.h>
> > > > > +#include <linux/bsearch.h>
> > > > > +#include <linux/sort.h>
> > > > >
> > > > > #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
> > > > > (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
> > > > > @@ -3025,10 +3027,18 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro
> > > > >
> > > > > #ifdef CONFIG_FPROBE
> > > > >
> > > > > +struct bpf_fprobe_cookie {
> > > > > + unsigned long addr;
> > > > > + u64 bpf_cookie;
> > > > > +};
> > > > > +
> > > > > struct bpf_fprobe_link {
> > > > > struct bpf_link link;
> > > > > struct fprobe fp;
> > > > > unsigned long *addrs;
> > > > > + struct bpf_run_ctx run_ctx;
> > > > > + struct bpf_fprobe_cookie *bpf_cookies;
> > > >
> > > > you already have all the addrs above, why keeping a second copy of
> > > > each addrs in bpf_fprobe_cookie. Let's have two arrays: addrs
> > > > (unsigned long) and cookies (u64) and make sure that they are sorted
> > > > together. Then lookup addrs, calculate index, use that index to fetch
> > > > cookie.
> > > >
> > > > Seems like sort_r() provides exactly the interface you'd need to do
> > > > this very easily. Having addrs separate from cookies also a bit
> > > > advantageous in terms of TLB misses (if you need any more persuasion
> > > > ;)
> > >
> > > no persuation needed, I actually tried that but it turned out sort_r
> > > is not ready yet ;-)
> > >
> > > because you can't pass priv pointer to the swap callback, so we can't
> > > swap the other array.. I did a change to allow that, but it's not trivial
> > > and will need some bigger testing/review because the original sort
> > > calls sort_r, and of course there are many 'sort' users ;-)
> >
> > Big sigh... :( Did you do something similar to _CMP_WRAPPER? You don't
> > need to change the interface of sort(), so it shouldn't require
> > extensive code refactoring. You'll just need to adjust priv to be not
> > just cmp_func, but cmp_func + swap_fun (need a small struct on the
> > stack in sort, probably). Or you did something else?
>
> I ended up with change below
>
exactly what I had in mind
> jirka
>
>
> ---
> include/linux/sort.h | 2 +-
> include/linux/types.h | 1 +
> lib/sort.c | 44 +++++++++++++++++++++++++++++++++----------
> 3 files changed, 36 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/sort.h b/include/linux/sort.h
> index b5898725fe9d..e163287ac6c1 100644
> --- a/include/linux/sort.h
> +++ b/include/linux/sort.h
> @@ -6,7 +6,7 @@
>
> void sort_r(void *base, size_t num, size_t size,
> cmp_r_func_t cmp_func,
> - swap_func_t swap_func,
> + swap_r_func_t swap_func,
> const void *priv);
>
> void sort(void *base, size_t num, size_t size,
> diff --git a/include/linux/types.h b/include/linux/types.h
> index ac825ad90e44..ea8cf60a8a79 100644
> --- a/include/linux/types.h
> +++ b/include/linux/types.h
> @@ -226,6 +226,7 @@ struct callback_head {
> typedef void (*rcu_callback_t)(struct rcu_head *head);
> typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
>
> +typedef void (*swap_r_func_t)(void *a, void *b, int size, const void *priv);
> typedef void (*swap_func_t)(void *a, void *b, int size);
>
> typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv);
> diff --git a/lib/sort.c b/lib/sort.c
> index aa18153864d2..f65078608c16 100644
> --- a/lib/sort.c
> +++ b/lib/sort.c
> @@ -122,16 +122,29 @@ static void swap_bytes(void *a, void *b, size_t n)
> * a pointer, but small integers make for the smallest compare
> * instructions.
> */
> -#define SWAP_WORDS_64 (swap_func_t)0
> -#define SWAP_WORDS_32 (swap_func_t)1
> -#define SWAP_BYTES (swap_func_t)2
> +#define SWAP_WORDS_64 (swap_r_func_t)0
> +#define SWAP_WORDS_32 (swap_r_func_t)1
> +#define SWAP_BYTES (swap_r_func_t)2
> +#define SWAP_WRAPPER (swap_r_func_t)3
> +
> +struct wrapper {
> + cmp_func_t cmp;
> + swap_func_t swap;
> +};
>
> /*
> * The function pointer is last to make tail calls most efficient if the
> * compiler decides not to inline this function.
> */
> -static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
> +static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
> {
> + const struct wrapper *w = priv;
I'd just move this under if
> +
> + if (swap_func == SWAP_WRAPPER) {
const struct wrapper *w = priv; here
> + w->swap(a, b, (int)size);
> + return;
> + }
> +
> if (swap_func == SWAP_WORDS_64)
> swap_words_64(a, b, size);
> else if (swap_func == SWAP_WORDS_32)
> @@ -139,15 +152,17 @@ static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
> else if (swap_func == SWAP_BYTES)
> swap_bytes(a, b, size);
> else
> - swap_func(a, b, (int)size);
> + swap_func(a, b, (int)size, priv);
> }
>
> #define _CMP_WRAPPER ((cmp_r_func_t)0L)
>
> static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
> {
> + const struct wrapper *w = priv;
> +
> if (cmp == _CMP_WRAPPER)
> - return ((cmp_func_t)(priv))(a, b);
> + return w->cmp(a, b);
same here, or just stick to the previous style with
return ((const struct wrapper *)priv)->cmd(a, b);
> return cmp(a, b, priv);
> }
>
> @@ -198,16 +213,20 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
> */
> void sort_r(void *base, size_t num, size_t size,
> cmp_r_func_t cmp_func,
> - swap_func_t swap_func,
> + swap_r_func_t swap_func,
> const void *priv)
> {
> /* pre-scale counters for performance */
> size_t n = num * size, a = (num/2) * size;
> const unsigned int lsbit = size & -size; /* Used to find parent */
> + const struct wrapper *w = priv;
>
> if (!a) /* num < 2 || size == 0 */
> return;
>
> + if (swap_func == SWAP_WRAPPER && !w->swap)
same here, I'd probably do the cast right here to keep this wrapper
stuff as local as possible
> + swap_func = NULL;
> +
> if (!swap_func) {
> if (is_aligned(base, size, 8))
> swap_func = SWAP_WORDS_64;
> @@ -230,7 +249,7 @@ void sort_r(void *base, size_t num, size_t size,
> if (a) /* Building heap: sift down --a */
> a -= size;
> else if (n -= size) /* Sorting: Extract root to --n */
> - do_swap(base, base + n, size, swap_func);
> + do_swap(base, base + n, size, swap_func, priv);
> else /* Sort complete */
> break;
>
> @@ -257,7 +276,7 @@ void sort_r(void *base, size_t num, size_t size,
> c = b; /* Where "a" belongs */
> while (b != a) { /* Shift it into place */
> b = parent(b, lsbit, size);
> - do_swap(base + b, base + c, size, swap_func);
> + do_swap(base + b, base + c, size, swap_func, priv);
> }
> }
> }
> @@ -267,6 +286,11 @@ void sort(void *base, size_t num, size_t size,
> cmp_func_t cmp_func,
> swap_func_t swap_func)
> {
> - return sort_r(base, num, size, _CMP_WRAPPER, swap_func, cmp_func);
> + struct wrapper w = {
> + .cmp = cmp_func,
> + .swap = swap_func,
> + };
> +
> + return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
> }
> EXPORT_SYMBOL(sort);
> --
> 2.34.1
>
On Tue, Feb 08, 2022 at 03:35:24PM -0800, Andrii Nakryiko wrote:
> On Tue, Feb 8, 2022 at 1:07 AM Jiri Olsa <[email protected]> wrote:
> >
> > On Mon, Feb 07, 2022 at 10:59:21AM -0800, Andrii Nakryiko wrote:
> > > On Wed, Feb 2, 2022 at 5:54 AM Jiri Olsa <[email protected]> wrote:
> > > >
> > > > Adding support to call bpf_get_attach_cookie helper from
> > > > kprobe program attached by fprobe link.
> > > >
> > > > The bpf_cookie is provided by array of u64 values, where
> > > > each value is paired with provided function address with
> > > > the same array index.
> > > >
> > > > Suggested-by: Andrii Nakryiko <[email protected]>
> > > > Signed-off-by: Jiri Olsa <[email protected]>
> > > > ---
> > > > include/linux/bpf.h | 2 +
> > > > include/uapi/linux/bpf.h | 1 +
> > > > kernel/bpf/syscall.c | 83 +++++++++++++++++++++++++++++++++-
> > > > kernel/trace/bpf_trace.c | 16 ++++++-
> > > > tools/include/uapi/linux/bpf.h | 1 +
> > > > 5 files changed, 100 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index 6eb0b180d33b..7b65f05c0487 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -1301,6 +1301,8 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> > > > #endif
> > > > }
> > > >
> > > > +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip);
> > > > +
> > > > /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
> > > > #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
> > > > /* BPF program asks to set CN on the packet. */
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index c0912f0a3dfe..0dc6aa4f9683 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -1484,6 +1484,7 @@ union bpf_attr {
> > > > __aligned_u64 addrs;
> > > > __u32 cnt;
> > > > __u32 flags;
> > > > + __aligned_u64 bpf_cookies;
> > >
> > > maybe put it right after addrs, they are closely related and cnt
> > > describes all of syms/addrs/cookies.
> >
> > ok
> >
> > >
> > > > } fprobe;
> > > > };
> > > > } link_create;
> > > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > > index 0cfbb112c8e1..6c5e74bc43b6 100644
> > > > --- a/kernel/bpf/syscall.c
> > > > +++ b/kernel/bpf/syscall.c
> > > > @@ -33,6 +33,8 @@
> > > > #include <linux/rcupdate_trace.h>
> > > > #include <linux/memcontrol.h>
> > > > #include <linux/fprobe.h>
> > > > +#include <linux/bsearch.h>
> > > > +#include <linux/sort.h>
> > > >
> > > > #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
> > > > (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
> > > > @@ -3025,10 +3027,18 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro
> > > >
> > > > #ifdef CONFIG_FPROBE
> > > >
> > > > +struct bpf_fprobe_cookie {
> > > > + unsigned long addr;
> > > > + u64 bpf_cookie;
> > > > +};
> > > > +
> > > > struct bpf_fprobe_link {
> > > > struct bpf_link link;
> > > > struct fprobe fp;
> > > > unsigned long *addrs;
> > > > + struct bpf_run_ctx run_ctx;
> > > > + struct bpf_fprobe_cookie *bpf_cookies;
> > >
> > > you already have all the addrs above, why keeping a second copy of
> > > each addrs in bpf_fprobe_cookie. Let's have two arrays: addrs
> > > (unsigned long) and cookies (u64) and make sure that they are sorted
> > > together. Then lookup addrs, calculate index, use that index to fetch
> > > cookie.
> > >
> > > Seems like sort_r() provides exactly the interface you'd need to do
> > > this very easily. Having addrs separate from cookies also a bit
> > > advantageous in terms of TLB misses (if you need any more persuasion
> > > ;)
> >
> > no persuation needed, I actually tried that but it turned out sort_r
> > is not ready yet ;-)
> >
> > because you can't pass priv pointer to the swap callback, so we can't
> > swap the other array.. I did a change to allow that, but it's not trivial
> > and will need some bigger testing/review because the original sort
> > calls sort_r, and of course there are many 'sort' users ;-)
>
> Big sigh... :( Did you do something similar to _CMP_WRAPPER? You don't
> need to change the interface of sort(), so it shouldn't require
> extensive code refactoring. You'll just need to adjust priv to be not
> just cmp_func, but cmp_func + swap_fun (need a small struct on the
> stack in sort, probably). Or you did something else?
I ended up with change below
jirka
---
include/linux/sort.h | 2 +-
include/linux/types.h | 1 +
lib/sort.c | 44 +++++++++++++++++++++++++++++++++----------
3 files changed, 36 insertions(+), 11 deletions(-)
diff --git a/include/linux/sort.h b/include/linux/sort.h
index b5898725fe9d..e163287ac6c1 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -6,7 +6,7 @@
void sort_r(void *base, size_t num, size_t size,
cmp_r_func_t cmp_func,
- swap_func_t swap_func,
+ swap_r_func_t swap_func,
const void *priv);
void sort(void *base, size_t num, size_t size,
diff --git a/include/linux/types.h b/include/linux/types.h
index ac825ad90e44..ea8cf60a8a79 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -226,6 +226,7 @@ struct callback_head {
typedef void (*rcu_callback_t)(struct rcu_head *head);
typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
+typedef void (*swap_r_func_t)(void *a, void *b, int size, const void *priv);
typedef void (*swap_func_t)(void *a, void *b, int size);
typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv);
diff --git a/lib/sort.c b/lib/sort.c
index aa18153864d2..f65078608c16 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -122,16 +122,29 @@ static void swap_bytes(void *a, void *b, size_t n)
* a pointer, but small integers make for the smallest compare
* instructions.
*/
-#define SWAP_WORDS_64 (swap_func_t)0
-#define SWAP_WORDS_32 (swap_func_t)1
-#define SWAP_BYTES (swap_func_t)2
+#define SWAP_WORDS_64 (swap_r_func_t)0
+#define SWAP_WORDS_32 (swap_r_func_t)1
+#define SWAP_BYTES (swap_r_func_t)2
+#define SWAP_WRAPPER (swap_r_func_t)3
+
+struct wrapper {
+ cmp_func_t cmp;
+ swap_func_t swap;
+};
/*
* The function pointer is last to make tail calls most efficient if the
* compiler decides not to inline this function.
*/
-static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
+static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
{
+ const struct wrapper *w = priv;
+
+ if (swap_func == SWAP_WRAPPER) {
+ w->swap(a, b, (int)size);
+ return;
+ }
+
if (swap_func == SWAP_WORDS_64)
swap_words_64(a, b, size);
else if (swap_func == SWAP_WORDS_32)
@@ -139,15 +152,17 @@ static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
else if (swap_func == SWAP_BYTES)
swap_bytes(a, b, size);
else
- swap_func(a, b, (int)size);
+ swap_func(a, b, (int)size, priv);
}
#define _CMP_WRAPPER ((cmp_r_func_t)0L)
static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
{
+ const struct wrapper *w = priv;
+
if (cmp == _CMP_WRAPPER)
- return ((cmp_func_t)(priv))(a, b);
+ return w->cmp(a, b);
return cmp(a, b, priv);
}
@@ -198,16 +213,20 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
*/
void sort_r(void *base, size_t num, size_t size,
cmp_r_func_t cmp_func,
- swap_func_t swap_func,
+ swap_r_func_t swap_func,
const void *priv)
{
/* pre-scale counters for performance */
size_t n = num * size, a = (num/2) * size;
const unsigned int lsbit = size & -size; /* Used to find parent */
+ const struct wrapper *w = priv;
if (!a) /* num < 2 || size == 0 */
return;
+ if (swap_func == SWAP_WRAPPER && !w->swap)
+ swap_func = NULL;
+
if (!swap_func) {
if (is_aligned(base, size, 8))
swap_func = SWAP_WORDS_64;
@@ -230,7 +249,7 @@ void sort_r(void *base, size_t num, size_t size,
if (a) /* Building heap: sift down --a */
a -= size;
else if (n -= size) /* Sorting: Extract root to --n */
- do_swap(base, base + n, size, swap_func);
+ do_swap(base, base + n, size, swap_func, priv);
else /* Sort complete */
break;
@@ -257,7 +276,7 @@ void sort_r(void *base, size_t num, size_t size,
c = b; /* Where "a" belongs */
while (b != a) { /* Shift it into place */
b = parent(b, lsbit, size);
- do_swap(base + b, base + c, size, swap_func);
+ do_swap(base + b, base + c, size, swap_func, priv);
}
}
}
@@ -267,6 +286,11 @@ void sort(void *base, size_t num, size_t size,
cmp_func_t cmp_func,
swap_func_t swap_func)
{
- return sort_r(base, num, size, _CMP_WRAPPER, swap_func, cmp_func);
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap = swap_func,
+ };
+
+ return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
}
EXPORT_SYMBOL(sort);
--
2.34.1
On Tue, Feb 8, 2022 at 1:07 AM Jiri Olsa <[email protected]> wrote:
>
> On Mon, Feb 07, 2022 at 10:59:21AM -0800, Andrii Nakryiko wrote:
> > On Wed, Feb 2, 2022 at 5:54 AM Jiri Olsa <[email protected]> wrote:
> > >
> > > Adding support to call bpf_get_attach_cookie helper from
> > > kprobe program attached by fprobe link.
> > >
> > > The bpf_cookie is provided by array of u64 values, where
> > > each value is paired with provided function address with
> > > the same array index.
> > >
> > > Suggested-by: Andrii Nakryiko <[email protected]>
> > > Signed-off-by: Jiri Olsa <[email protected]>
> > > ---
> > > include/linux/bpf.h | 2 +
> > > include/uapi/linux/bpf.h | 1 +
> > > kernel/bpf/syscall.c | 83 +++++++++++++++++++++++++++++++++-
> > > kernel/trace/bpf_trace.c | 16 ++++++-
> > > tools/include/uapi/linux/bpf.h | 1 +
> > > 5 files changed, 100 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > index 6eb0b180d33b..7b65f05c0487 100644
> > > --- a/include/linux/bpf.h
> > > +++ b/include/linux/bpf.h
> > > @@ -1301,6 +1301,8 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> > > #endif
> > > }
> > >
> > > +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip);
> > > +
> > > /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
> > > #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
> > > /* BPF program asks to set CN on the packet. */
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index c0912f0a3dfe..0dc6aa4f9683 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -1484,6 +1484,7 @@ union bpf_attr {
> > > __aligned_u64 addrs;
> > > __u32 cnt;
> > > __u32 flags;
> > > + __aligned_u64 bpf_cookies;
> >
> > maybe put it right after addrs, they are closely related and cnt
> > describes all of syms/addrs/cookies.
>
> ok
>
> >
> > > } fprobe;
> > > };
> > > } link_create;
> > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > index 0cfbb112c8e1..6c5e74bc43b6 100644
> > > --- a/kernel/bpf/syscall.c
> > > +++ b/kernel/bpf/syscall.c
> > > @@ -33,6 +33,8 @@
> > > #include <linux/rcupdate_trace.h>
> > > #include <linux/memcontrol.h>
> > > #include <linux/fprobe.h>
> > > +#include <linux/bsearch.h>
> > > +#include <linux/sort.h>
> > >
> > > #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
> > > (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
> > > @@ -3025,10 +3027,18 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro
> > >
> > > #ifdef CONFIG_FPROBE
> > >
> > > +struct bpf_fprobe_cookie {
> > > + unsigned long addr;
> > > + u64 bpf_cookie;
> > > +};
> > > +
> > > struct bpf_fprobe_link {
> > > struct bpf_link link;
> > > struct fprobe fp;
> > > unsigned long *addrs;
> > > + struct bpf_run_ctx run_ctx;
> > > + struct bpf_fprobe_cookie *bpf_cookies;
> >
> > you already have all the addrs above, why keeping a second copy of
> > each addrs in bpf_fprobe_cookie. Let's have two arrays: addrs
> > (unsigned long) and cookies (u64) and make sure that they are sorted
> > together. Then lookup addrs, calculate index, use that index to fetch
> > cookie.
> >
> > Seems like sort_r() provides exactly the interface you'd need to do
> > this very easily. Having addrs separate from cookies also a bit
> > advantageous in terms of TLB misses (if you need any more persuasion
> > ;)
>
> no persuation needed, I actually tried that but it turned out sort_r
> is not ready yet ;-)
>
> because you can't pass priv pointer to the swap callback, so we can't
> swap the other array.. I did a change to allow that, but it's not trivial
> and will need some bigger testing/review because the original sort
> calls sort_r, and of course there are many 'sort' users ;-)
Big sigh... :( Did you do something similar to _CMP_WRAPPER? You don't
need to change the interface of sort(), so it shouldn't require
extensive code refactoring. You'll just need to adjust priv to be not
just cmp_func, but cmp_func + swap_fun (need a small struct on the
stack in sort, probably). Or you did something else?
>
> >
> > > + u32 cnt;
> > > };
> > >
> > > static void bpf_fprobe_link_release(struct bpf_link *link)
> > > @@ -3045,6 +3055,7 @@ static void bpf_fprobe_link_dealloc(struct bpf_link *link)
> > >
> > > fprobe_link = container_of(link, struct bpf_fprobe_link, link);
> > > kfree(fprobe_link->addrs);
> > > + kfree(fprobe_link->bpf_cookies);
> > > kfree(fprobe_link);
> > > }
> > >
> > > @@ -3053,9 +3064,37 @@ static const struct bpf_link_ops bpf_fprobe_link_lops = {
> > > .dealloc = bpf_fprobe_link_dealloc,
> > > };
> > >
> > > +static int bpf_fprobe_cookie_cmp(const void *_a, const void *_b)
> > > +{
> > > + const struct bpf_fprobe_cookie *a = _a;
> > > + const struct bpf_fprobe_cookie *b = _b;
> > > +
> > > + if (a->addr == b->addr)
> > > + return 0;
> > > + return a->addr < b->addr ? -1 : 1;
> > > +}
> > > +
> > > +u64 bpf_fprobe_cookie(struct bpf_run_ctx *ctx, u64 ip)
> > > +{
> > > + struct bpf_fprobe_link *fprobe_link;
> > > + struct bpf_fprobe_cookie *val, key = {
> > > + .addr = (unsigned long) ip,
> > > + };
> > > +
> > > + if (!ctx)
> > > + return 0;
> >
> > is it allowed to have ctx == NULL?
>
> nope, I was also thinking this is more 'WARN_ON[_ONCE]' check
>
> >
> > > + fprobe_link = container_of(ctx, struct bpf_fprobe_link, run_ctx);
> > > + if (!fprobe_link->bpf_cookies)
> > > + return 0;
> > > + val = bsearch(&key, fprobe_link->bpf_cookies, fprobe_link->cnt,
> > > + sizeof(key), bpf_fprobe_cookie_cmp);
> > > + return val ? val->bpf_cookie : 0;
> > > +}
> > > +
> > > static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
> > > struct pt_regs *regs)
> > > {
> > > + struct bpf_run_ctx *old_run_ctx;
> > > int err;
> > >
> > > if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> > > @@ -3063,12 +3102,16 @@ static int fprobe_link_prog_run(struct bpf_fprobe_link *fprobe_link,
> > > goto out;
> > > }
> > >
> > > + old_run_ctx = bpf_set_run_ctx(&fprobe_link->run_ctx);
> > > +
> > > rcu_read_lock();
> > > migrate_disable();
> > > err = bpf_prog_run(fprobe_link->link.prog, regs);
> > > migrate_enable();
> > > rcu_read_unlock();
> > >
> > > + bpf_reset_run_ctx(old_run_ctx);
> > > +
> > > out:
> > > __this_cpu_dec(bpf_prog_active);
> > > return err;
> > > @@ -3161,10 +3204,12 @@ static int fprobe_resolve_syms(const void *usyms, u32 cnt,
> > >
> > > static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> > > {
> > > + struct bpf_fprobe_cookie *bpf_cookies = NULL;
> > > struct bpf_fprobe_link *link = NULL;
> > > struct bpf_link_primer link_primer;
> > > + void __user *ubpf_cookies;
> > > + u32 flags, cnt, i, size;
> > > unsigned long *addrs;
> > > - u32 flags, cnt, size;
> > > void __user *uaddrs;
> > > void __user *usyms;
> > > int err;
> > > @@ -3205,6 +3250,37 @@ static int bpf_fprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *p
> > > goto error;
> > > }
> > >
> > > + ubpf_cookies = u64_to_user_ptr(attr->link_create.fprobe.bpf_cookies);
> >
> > nit: let's call all this "cookies", this bpf_ prefix feels a bit
> > redundant (I know about perf_event.bpf_cookie, but still).
>
> ok
>
> >
> > > + if (ubpf_cookies) {
> > > + u64 *tmp;
> > > +
> > > + err = -ENOMEM;
> > > + tmp = kzalloc(size, GFP_KERNEL);
> >
> > kvmalloc?
>
> ok
>
> thanks,
> jirka
>