It's useful to know [uk]probe's nmissed and nhit stats. For example with
tracing tools, it's important to know when events may have been lost.
debugfs currently exposes a control file to get this information, but
it is not compatible with probes registered with the perf API.
While bpf programs may be able to manually count nhit, there is no way
to gather nmissed. In other words, it is currently not possible to
retrieve information about FD-based probes.
This patch adds a new ioctl that lets users query nmissed (as well as
nhit for completeness). We currently only add support for [uk]probes
but leave the possibility open for other probes like tracepoint.
v1 -> v2:
- More descriptive cover letter
- Make API more generic and support uprobes as well
- Use casters/getters for libbpf instead of single getter
- Fix typos
- Remove size field from ioctl struct
- Split out libbpf.h sync to tools dir to separate commit
Daniel Xu (4):
tracing/probe: Add PERF_EVENT_IOC_QUERY_PROBE ioctl
libbpf: Add helpers to extract perf fd from bpf_link
tracing/probe: Sync perf_event.h to tools
tracing/probe: Add self test for PERF_EVENT_IOC_QUERY_PROBE
include/linux/trace_events.h | 12 +++
include/uapi/linux/perf_event.h | 19 ++++
kernel/events/core.c | 20 ++++
kernel/trace/trace_kprobe.c | 23 ++++
kernel/trace/trace_uprobe.c | 23 ++++
tools/include/uapi/linux/perf_event.h | 19 ++++
tools/lib/bpf/libbpf.c | 19 ++++
tools/lib/bpf/libbpf.h | 8 ++
tools/lib/bpf/libbpf.map | 6 ++
.../selftests/bpf/prog_tests/attach_probe.c | 102 ++++++++++++++++++
10 files changed, 251 insertions(+)
--
2.20.1
It's useful to know [uk]probe's nmissed and nhit stats. For example with
tracing tools, it's important to know when events may have been lost.
debugfs currently exposes a control file to get this information, but
it is not compatible with probes registered with the perf API.
While bpf programs may be able to manually count nhit, there is no way
to gather nmissed. In other words, it is currently not possible to
retrieve information about FD-based probes.
This patch adds a new ioctl that lets users query nmissed (as well as
nhit for completeness). We currently only add support for [uk]probes
but leave the possibility open for other probes like tracepoint.
Signed-off-by: Daniel Xu <[email protected]>
---
include/linux/trace_events.h | 12 ++++++++++++
include/uapi/linux/perf_event.h | 19 +++++++++++++++++++
kernel/events/core.c | 20 ++++++++++++++++++++
kernel/trace/trace_kprobe.c | 23 +++++++++++++++++++++++
kernel/trace/trace_uprobe.c | 23 +++++++++++++++++++++++
5 files changed, 97 insertions(+)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 5150436783e8..61558f19696a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -586,6 +586,12 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
u32 *fd_type, const char **symbol,
u64 *probe_offset, u64 *probe_addr,
bool perf_type_tracepoint);
+extern int perf_kprobe_event_query(struct perf_event *event, void __user *info);
+#else
+int perf_kprobe_event_query(struct perf_event *event, void __user *info)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_UPROBE_EVENTS
extern int perf_uprobe_init(struct perf_event *event,
@@ -594,6 +600,12 @@ extern void perf_uprobe_destroy(struct perf_event *event);
extern int bpf_get_uprobe_info(const struct perf_event *event,
u32 *fd_type, const char **filename,
u64 *probe_offset, bool perf_type_tracepoint);
+extern int perf_uprobe_event_query(struct perf_event *event, void __user *info);
+#else
+int perf_uprobe_event_query(struct perf_event *event, void __user *info)
+{
+ return -EOPNOTSUPP;
+}
#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..65faa9b2a3b4 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -447,6 +447,24 @@ struct perf_event_query_bpf {
__u32 ids[0];
};
+/*
+ * Structure used by below PERF_EVENT_IOC_QUERY_PROBE command
+ * to query information about the probe attached to the perf
+ * event. Currently only supports [uk]probes.
+ */
+struct perf_event_query_probe {
+ /*
+ * Set by the kernel to indicate number of times this probe
+ * was temporarily disabled
+ */
+ __u64 nmissed;
+ /*
+ * Set by the kernel to indicate number of times this probe
+ * was hit
+ */
+ __u64 nhit;
+};
+
/*
* Ioctls that can be done on a perf event fd:
*/
@@ -462,6 +480,7 @@ struct perf_event_query_bpf {
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_QUERY_PROBE _IOR('$', 12, struct perf_event_query_probe *)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..3e0fe6eaaad0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5060,6 +5060,8 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
static int perf_copy_attr(struct perf_event_attr __user *uattr,
struct perf_event_attr *attr);
+static int perf_probe_event_query(struct perf_event *event,
+ void __user *info);
static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
{
@@ -5143,6 +5145,10 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
return perf_event_modify_attr(event, &new_attr);
}
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
+ case PERF_EVENT_IOC_QUERY_PROBE:
+ return perf_probe_event_query(event, (void __user *)arg);
+#endif
default:
return -ENOTTY;
}
@@ -8833,6 +8839,20 @@ static inline void perf_tp_register(void)
#endif
}
+static int perf_probe_event_query(struct perf_event *event,
+ void __user *info)
+{
+#ifdef CONFIG_KPROBE_EVENTS
+ if (event->attr.type == perf_kprobe.type)
+ return perf_kprobe_event_query(event, (void __user *)info);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+ if (event->attr.type == perf_uprobe.type)
+ return perf_uprobe_event_query(event, (void __user *)info);
+#endif
+ return -EINVAL;
+}
+
static void perf_event_free_filter(struct perf_event *event)
{
ftrace_profile_free_filter(event);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9d483ad9bb6c..a734c2d506be 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -196,6 +196,29 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call)
return within_error_injection_list(trace_kprobe_address(tk));
}
+int perf_kprobe_event_query(struct perf_event *event, void __user *info)
+{
+ struct perf_event_query_probe __user *uquery = info;
+ struct perf_event_query_probe query = {};
+ struct trace_event_call *call = event->tp_event;
+ struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ u64 nmissed, nhit;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user(&query, uquery, sizeof(query)))
+ return -EFAULT;
+
+ nhit = trace_kprobe_nhit(tk);
+ nmissed = tk->rp.kp.nmissed;
+
+ if (put_user(nmissed, &uquery->nmissed) ||
+ put_user(nhit, &uquery->nhit))
+ return -EFAULT;
+
+ return 0;
+}
+
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 1ceedb9146b1..5f50386ada59 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1333,6 +1333,29 @@ static inline void init_trace_event_call(struct trace_uprobe *tu)
call->data = tu;
}
+int perf_uprobe_event_query(struct perf_event *event, void __user *info)
+{
+ struct perf_event_query_probe __user *uquery = info;
+ struct perf_event_query_probe query = {};
+ struct trace_event_call *call = event->tp_event;
+ struct trace_uprobe *tu = (struct trace_uprobe *)call->data;
+ u64 nmissed, nhit;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user(&query, uquery, sizeof(query)))
+ return -EFAULT;
+
+ nhit = tu->nhit;
+ nmissed = 0;
+
+ if (put_user(nmissed, &uquery->nmissed) ||
+ put_user(nhit, &uquery->nhit))
+ return -EFAULT;
+
+ return 0;
+}
+
static int register_uprobe_event(struct trace_uprobe *tu)
{
init_trace_event_call(tu);
--
2.20.1
It is sometimes necessary to perform ioctl's on the underlying perf fd.
There is not currently a way to extract the fd given a bpf_link, so add a
a pair of casting and getting helpers.
The casting and getting helpers are nice because they let us define
broad categories of links that makes it clear to users what they can
expect to extract from what type of link.
Signed-off-by: Daniel Xu <[email protected]>
---
tools/lib/bpf/libbpf.c | 19 +++++++++++++++++++
tools/lib/bpf/libbpf.h | 8 ++++++++
tools/lib/bpf/libbpf.map | 6 ++++++
3 files changed, 33 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ead915aec349..f4d750863abd 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4004,6 +4004,25 @@ static int bpf_link__destroy_perf_event(struct bpf_link *link)
return err;
}
+const struct bpf_link_fd *bpf_link__as_fd(const struct bpf_link *link)
+{
+ if (!link)
+ return NULL;
+
+ if (link->destroy != &bpf_link__destroy_perf_event)
+ return NULL;
+
+ return (struct bpf_link_fd *)link;
+}
+
+int bpf_link_fd__fd(const struct bpf_link_fd *link)
+{
+ if (!link)
+ return -1;
+
+ return link->fd;
+}
+
struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
int pfd)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 8a9d462a6f6d..4498b6ae459a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -166,6 +166,14 @@ LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
struct bpf_link;
+struct bpf_link_fd;
+
+/* casting APIs */
+LIBBPF_API const struct bpf_link_fd *
+bpf_link__as_fd(const struct bpf_link *link);
+
+/* getters APIs */
+LIBBPF_API int bpf_link_fd__fd(const struct bpf_link_fd *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f9d316e873d8..b58dd0f0259c 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -184,3 +184,9 @@ LIBBPF_0.0.4 {
perf_buffer__new_raw;
perf_buffer__poll;
} LIBBPF_0.0.3;
+
+LIBBPF_0.0.5 {
+ global:
+ bpf_link__as_fd;
+ bpf_link_fd__fd;
+} LIBBPF_0.0.4;
--
2.20.1
Signed-off-by: Daniel Xu <[email protected]>
---
tools/include/uapi/linux/perf_event.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..65faa9b2a3b4 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -447,6 +447,24 @@ struct perf_event_query_bpf {
__u32 ids[0];
};
+/*
+ * Structure used by below PERF_EVENT_IOC_QUERY_PROBE command
+ * to query information about the probe attached to the perf
+ * event. Currently only supports [uk]probes.
+ */
+struct perf_event_query_probe {
+ /*
+ * Set by the kernel to indicate number of times this probe
+ * was temporarily disabled
+ */
+ __u64 nmissed;
+ /*
+ * Set by the kernel to indicate number of times this probe
+ * was hit
+ */
+ __u64 nhit;
+};
+
/*
* Ioctls that can be done on a perf event fd:
*/
@@ -462,6 +480,7 @@ struct perf_event_query_bpf {
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_QUERY_PROBE _IOR('$', 12, struct perf_event_query_probe *)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
--
2.20.1
Signed-off-by: Daniel Xu <[email protected]>
---
.../selftests/bpf/prog_tests/attach_probe.c | 102 ++++++++++++++++++
1 file changed, 102 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 5ecc267d98b0..bb53103ddb66 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -27,17 +27,27 @@ void test_attach_probe(void)
const char *kretprobe_name = "kretprobe/sys_nanosleep";
const char *uprobe_name = "uprobe/trigger_func";
const char *uretprobe_name = "uretprobe/trigger_func";
+ struct perf_event_query_probe kprobe_query = {};
+ struct perf_event_query_probe kretprobe_query = {};
+ struct perf_event_query_probe uprobe_query = {};
+ struct perf_event_query_probe uretprobe_query = {};
const int kprobe_idx = 0, kretprobe_idx = 1;
const int uprobe_idx = 2, uretprobe_idx = 3;
const char *file = "./test_attach_probe.o";
struct bpf_program *kprobe_prog, *kretprobe_prog;
struct bpf_program *uprobe_prog, *uretprobe_prog;
struct bpf_object *obj;
+ const struct bpf_link_fd *kprobe_fd_link;
+ const struct bpf_link_fd *kretprobe_fd_link;
+ const struct bpf_link_fd *uprobe_fd_link;
+ const struct bpf_link_fd *uretprobe_fd_link;
int err, prog_fd, duration = 0, res;
struct bpf_link *kprobe_link = NULL;
struct bpf_link *kretprobe_link = NULL;
struct bpf_link *uprobe_link = NULL;
struct bpf_link *uretprobe_link = NULL;
+ int kprobe_fd, kretprobe_fd;
+ int uprobe_fd, uretprobe_fd;
int results_map_fd;
size_t uprobe_offset;
ssize_t base_addr;
@@ -116,6 +126,52 @@ void test_attach_probe(void)
/* trigger & validate kprobe && kretprobe */
usleep(1);
+ kprobe_fd_link = bpf_link__as_fd(kprobe_link);
+ if (CHECK(!kprobe_fd_link, "kprobe_link_as_fd",
+ "failed to cast link to fd link\n"))
+ goto cleanup;
+
+ kprobe_fd = bpf_link_fd__fd(kprobe_fd_link);
+ if (CHECK(kprobe_fd < 0, "kprobe_get_perf_fd",
+ "failed to get perf fd from kprobe link\n"))
+ goto cleanup;
+
+ kretprobe_fd_link = bpf_link__as_fd(kretprobe_link);
+ if (CHECK(!kretprobe_fd_link, "kretprobe_link_as_fd",
+ "failed to cast link to fd link\n"))
+ goto cleanup;
+
+ kretprobe_fd = bpf_link_fd__fd(kretprobe_fd_link);
+ if (CHECK(kretprobe_fd < 0, "kretprobe_get_perf_fd",
+ "failed to get perf fd from kretprobe link\n"))
+ goto cleanup;
+
+ err = ioctl(kprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &kprobe_query);
+ if (CHECK(err, "get_kprobe_ioctl",
+ "failed to issue kprobe query ioctl\n"))
+ goto cleanup;
+ if (CHECK(kprobe_query.nmissed > 0, "get_kprobe_ioctl",
+ "read incorect nmissed from kprobe_ioctl: %llu\n",
+ kprobe_query.nmissed))
+ goto cleanup;
+ if (CHECK(kprobe_query.nhit == 0, "get_kprobe_ioctl",
+ "read incorect nhit from kprobe_ioctl: %llu\n",
+ kprobe_query.nhit))
+ goto cleanup;
+
+ err = ioctl(kretprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &kretprobe_query);
+ if (CHECK(err, "get_kretprobe_ioctl",
+ "failed to issue kretprobe query ioctl\n"))
+ goto cleanup;
+ if (CHECK(kretprobe_query.nmissed > 0, "get_kretprobe_ioctl",
+ "read incorect nmissed from kretprobe_ioctl: %llu\n",
+ kretprobe_query.nmissed))
+ goto cleanup;
+ if (CHECK(kretprobe_query.nhit <= 0, "get_kretprobe_ioctl",
+ "read incorect nhit from kretprobe_ioctl: %llu\n",
+ kretprobe_query.nhit))
+ goto cleanup;
+
err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res);
if (CHECK(err, "get_kprobe_res",
"failed to get kprobe res: %d\n", err))
@@ -135,6 +191,52 @@ void test_attach_probe(void)
/* trigger & validate uprobe & uretprobe */
get_base_addr();
+ uprobe_fd_link = bpf_link__as_fd(uprobe_link);
+ if (CHECK(!uprobe_fd_link, "uprobe_link_as_fd",
+ "failed to cast link to fd link\n"))
+ goto cleanup;
+
+ uprobe_fd = bpf_link_fd__fd(uprobe_fd_link);
+ if (CHECK(uprobe_fd < 0, "uprobe_get_perf_fd",
+ "failed to get perf fd from uprobe link\n"))
+ goto cleanup;
+
+ uretprobe_fd_link = bpf_link__as_fd(uretprobe_link);
+ if (CHECK(!uretprobe_fd_link, "uretprobe_link_as_fd",
+ "failed to cast link to fd link\n"))
+ goto cleanup;
+
+ uretprobe_fd = bpf_link_fd__fd(uretprobe_fd_link);
+ if (CHECK(uretprobe_fd < 0, "uretprobe_get_perf_fd",
+ "failed to get perf fd from uretprobe link\n"))
+ goto cleanup;
+
+ err = ioctl(uprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &uprobe_query);
+ if (CHECK(err, "get_uprobe_ioctl",
+ "failed to issue uprobe query ioctl\n"))
+ goto cleanup;
+ if (CHECK(uprobe_query.nmissed > 0, "get_uprobe_ioctl",
+ "read incorect nmissed from uprobe_ioctl: %llu\n",
+ uprobe_query.nmissed))
+ goto cleanup;
+ if (CHECK(uprobe_query.nhit == 0, "get_uprobe_ioctl",
+ "read incorect nhit from uprobe_ioctl: %llu\n",
+ uprobe_query.nhit))
+ goto cleanup;
+
+ err = ioctl(uretprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &uretprobe_query);
+ if (CHECK(err, "get_uretprobe_ioctl",
+ "failed to issue uretprobe query ioctl\n"))
+ goto cleanup;
+ if (CHECK(uretprobe_query.nmissed > 0, "get_uretprobe_ioctl",
+ "read incorect nmissed from uretprobe_ioctl: %llu\n",
+ uretprobe_query.nmissed))
+ goto cleanup;
+ if (CHECK(uretprobe_query.nhit <= 0, "get_uretprobe_ioctl",
+ "read incorect nhit from uretprobe_ioctl: %llu\n",
+ uretprobe_query.nhit))
+ goto cleanup;
+
err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res);
if (CHECK(err, "get_uprobe_res",
"failed to get uprobe res: %d\n", err))
--
2.20.1
On Fri, Aug 9, 2019, at 2:47 PM, Daniel Xu wrote:
> It's useful to know [uk]probe's nmissed and nhit stats. For example with
> tracing tools, it's important to know when events may have been lost.
> debugfs currently exposes a control file to get this information, but
> it is not compatible with probes registered with the perf API.
>
> While bpf programs may be able to manually count nhit, there is no way
> to gather nmissed. In other words, it is currently not possible to
> retrieve information about FD-based probes.
>
> This patch adds a new ioctl that lets users query nmissed (as well as
> nhit for completeness). We currently only add support for [uk]probes
> but leave the possibility open for other probes like tracepoint.
>
> v1 -> v2:
> - More descriptive cover letter
> - Make API more generic and support uprobes as well
> - Use casters/getters for libbpf instead of single getter
> - Fix typos
> - Remove size field from ioctl struct
> - Split out libbpf.h sync to tools dir to separate commit
>
> Daniel Xu (4):
> tracing/probe: Add PERF_EVENT_IOC_QUERY_PROBE ioctl
> libbpf: Add helpers to extract perf fd from bpf_link
> tracing/probe: Sync perf_event.h to tools
> tracing/probe: Add self test for PERF_EVENT_IOC_QUERY_PROBE
>
> include/linux/trace_events.h | 12 +++
> include/uapi/linux/perf_event.h | 19 ++++
> kernel/events/core.c | 20 ++++
> kernel/trace/trace_kprobe.c | 23 ++++
> kernel/trace/trace_uprobe.c | 23 ++++
> tools/include/uapi/linux/perf_event.h | 19 ++++
> tools/lib/bpf/libbpf.c | 19 ++++
> tools/lib/bpf/libbpf.h | 8 ++
> tools/lib/bpf/libbpf.map | 6 ++
> .../selftests/bpf/prog_tests/attach_probe.c | 102 ++++++++++++++++++
> 10 files changed, 251 insertions(+)
>
> --
> 2.20.1
>
>
CC PeterZ, whose email I misspelled. Apologies.
Daniel
On Fri, Aug 9, 2019 at 2:47 PM Daniel Xu <[email protected]> wrote:
>
> It's useful to know [uk]probe's nmissed and nhit stats. For example with
> tracing tools, it's important to know when events may have been lost.
> debugfs currently exposes a control file to get this information, but
> it is not compatible with probes registered with the perf API.
>
> While bpf programs may be able to manually count nhit, there is no way
> to gather nmissed. In other words, it is currently not possible to
> retrieve information about FD-based probes.
>
> This patch adds a new ioctl that lets users query nmissed (as well as
> nhit for completeness). We currently only add support for [uk]probes
> but leave the possibility open for other probes like tracepoint.
>
> Signed-off-by: Daniel Xu <[email protected]>
> ---
> include/linux/trace_events.h | 12 ++++++++++++
> include/uapi/linux/perf_event.h | 19 +++++++++++++++++++
> kernel/events/core.c | 20 ++++++++++++++++++++
> kernel/trace/trace_kprobe.c | 23 +++++++++++++++++++++++
> kernel/trace/trace_uprobe.c | 23 +++++++++++++++++++++++
> 5 files changed, 97 insertions(+)
>
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 5150436783e8..61558f19696a 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -586,6 +586,12 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
> u32 *fd_type, const char **symbol,
> u64 *probe_offset, u64 *probe_addr,
> bool perf_type_tracepoint);
> +extern int perf_kprobe_event_query(struct perf_event *event, void __user *info);
> +#else
> +int perf_kprobe_event_query(struct perf_event *event, void __user *info)
> +{
> + return -EOPNOTSUPP;
> +}
> #endif
> #ifdef CONFIG_UPROBE_EVENTS
> extern int perf_uprobe_init(struct perf_event *event,
> @@ -594,6 +600,12 @@ extern void perf_uprobe_destroy(struct perf_event *event);
> extern int bpf_get_uprobe_info(const struct perf_event *event,
> u32 *fd_type, const char **filename,
> u64 *probe_offset, bool perf_type_tracepoint);
> +extern int perf_uprobe_event_query(struct perf_event *event, void __user *info);
> +#else
> +int perf_uprobe_event_query(struct perf_event *event, void __user *info)
> +{
> + return -EOPNOTSUPP;
> +}
> #endif
> extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
> char *filter_str);
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 7198ddd0c6b1..65faa9b2a3b4 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -447,6 +447,24 @@ struct perf_event_query_bpf {
> __u32 ids[0];
> };
>
> +/*
> + * Structure used by below PERF_EVENT_IOC_QUERY_PROBE command
> + * to query information about the probe attached to the perf
> + * event. Currently only supports [uk]probes.
> + */
> +struct perf_event_query_probe {
> + /*
> + * Set by the kernel to indicate number of times this probe
> + * was temporarily disabled
> + */
> + __u64 nmissed;
> + /*
> + * Set by the kernel to indicate number of times this probe
> + * was hit
> + */
> + __u64 nhit;
> +};
> +
> /*
> * Ioctls that can be done on a perf event fd:
> */
> @@ -462,6 +480,7 @@ struct perf_event_query_bpf {
> #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
> #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
> #define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
> +#define PERF_EVENT_IOC_QUERY_PROBE _IOR('$', 12, struct perf_event_query_probe *)
>
> enum perf_event_ioc_flags {
> PERF_IOC_FLAG_GROUP = 1U << 0,
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 026a14541a38..3e0fe6eaaad0 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -5060,6 +5060,8 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg);
> static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
> static int perf_copy_attr(struct perf_event_attr __user *uattr,
> struct perf_event_attr *attr);
> +static int perf_probe_event_query(struct perf_event *event,
> + void __user *info);
>
> static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
> {
> @@ -5143,6 +5145,10 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
>
> return perf_event_modify_attr(event, &new_attr);
> }
> +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
> + case PERF_EVENT_IOC_QUERY_PROBE:
> + return perf_probe_event_query(event, (void __user *)arg);
> +#endif
> default:
> return -ENOTTY;
> }
> @@ -8833,6 +8839,20 @@ static inline void perf_tp_register(void)
> #endif
> }
>
> +static int perf_probe_event_query(struct perf_event *event,
> + void __user *info)
> +{
> +#ifdef CONFIG_KPROBE_EVENTS
> + if (event->attr.type == perf_kprobe.type)
> + return perf_kprobe_event_query(event, (void __user *)info);
> +#endif
> +#ifdef CONFIG_UPROBE_EVENTS
> + if (event->attr.type == perf_uprobe.type)
> + return perf_uprobe_event_query(event, (void __user *)info);
> +#endif
> + return -EINVAL;
> +}
> +
> static void perf_event_free_filter(struct perf_event *event)
> {
> ftrace_profile_free_filter(event);
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 9d483ad9bb6c..a734c2d506be 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -196,6 +196,29 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call)
> return within_error_injection_list(trace_kprobe_address(tk));
> }
>
> +int perf_kprobe_event_query(struct perf_event *event, void __user *info)
> +{
> + struct perf_event_query_probe __user *uquery = info;
> + struct perf_event_query_probe query = {};
> + struct trace_event_call *call = event->tp_event;
> + struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> + u64 nmissed, nhit;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> + if (copy_from_user(&query, uquery, sizeof(query)))
what about forward/backward compatibility? Didn't you have a size
field for perf_event_query_probe?
> + return -EFAULT;
> +
> + nhit = trace_kprobe_nhit(tk);
> + nmissed = tk->rp.kp.nmissed;
> +
> + if (put_user(nmissed, &uquery->nmissed) ||
> + put_user(nhit, &uquery->nhit))
Wouldn't it be nicer to just do one user put for entire struct (or at
least relevant part of it with backward/forward compatibility?).
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> static int register_kprobe_event(struct trace_kprobe *tk);
> static int unregister_kprobe_event(struct trace_kprobe *tk);
>
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index 1ceedb9146b1..5f50386ada59 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -1333,6 +1333,29 @@ static inline void init_trace_event_call(struct trace_uprobe *tu)
> call->data = tu;
> }
>
> +int perf_uprobe_event_query(struct perf_event *event, void __user *info)
> +{
> + struct perf_event_query_probe __user *uquery = info;
> + struct perf_event_query_probe query = {};
> + struct trace_event_call *call = event->tp_event;
> + struct trace_uprobe *tu = (struct trace_uprobe *)call->data;
> + u64 nmissed, nhit;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> + if (copy_from_user(&query, uquery, sizeof(query)))
> + return -EFAULT;
> +
> + nhit = tu->nhit;
> + nmissed = 0;
> +
> + if (put_user(nmissed, &uquery->nmissed) ||
> + put_user(nhit, &uquery->nhit))
> + return -EFAULT;
same questions as above
> +
> + return 0;
> +}
> +
> static int register_uprobe_event(struct trace_uprobe *tu)
> {
> init_trace_event_call(tu);
> --
> 2.20.1
>
On Fri, Aug 9, 2019 at 2:48 PM Daniel Xu <[email protected]> wrote:
>
> It is sometimes necessary to perform ioctl's on the underlying perf fd.
> There is not currently a way to extract the fd given a bpf_link, so add a
> a pair of casting and getting helpers.
>
> The casting and getting helpers are nice because they let us define
> broad categories of links that makes it clear to users what they can
> expect to extract from what type of link.
>
> Signed-off-by: Daniel Xu <[email protected]>
> ---
> tools/lib/bpf/libbpf.c | 19 +++++++++++++++++++
> tools/lib/bpf/libbpf.h | 8 ++++++++
> tools/lib/bpf/libbpf.map | 6 ++++++
> 3 files changed, 33 insertions(+)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index ead915aec349..f4d750863abd 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -4004,6 +4004,25 @@ static int bpf_link__destroy_perf_event(struct bpf_link *link)
> return err;
> }
>
> +const struct bpf_link_fd *bpf_link__as_fd(const struct bpf_link *link)
> +{
> + if (!link)
> + return NULL;
I'm not sure about all those NULL checks everywhere. It's not really a
Java, passing NULL is ok only to xxx__free() kind of functions,
everything else shouldn't expect to deal with NULLs correctly, IMO.
> +
> + if (link->destroy != &bpf_link__destroy_perf_event)
While this is clever, it doesn't handle case for raw tracepoint
already. It is also anti-future proof, as when we add another use case
for bpf_link__fd with different destroy callback, we'll most probably
forget to update this function.
So let's instead introduce enum bpf_link_type and make it standard
part of "abstract" bpf_link. Please also add getter
enum bpf_link_type bpf_link__type(const struct bpf_link *link)
{
return link->type;
}
to fetch it. That way users will also be able to write generic
functions potentially handling multiple kinds of bpf_links (and there
won't be a need to just "guess" the type of bpf_link).
> + return NULL;
> +
> + return (struct bpf_link_fd *)link;
> +}
> +
> +int bpf_link_fd__fd(const struct bpf_link_fd *link)
> +{
> + if (!link)
> + return -1;
> +
> + return link->fd;
> +}
> +
> struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
> int pfd)
> {
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 8a9d462a6f6d..4498b6ae459a 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -166,6 +166,14 @@ LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
> LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
>
> struct bpf_link;
> +struct bpf_link_fd;
> +
> +/* casting APIs */
> +LIBBPF_API const struct bpf_link_fd *
> +bpf_link__as_fd(const struct bpf_link *link);
> +
> +/* getters APIs */
> +LIBBPF_API int bpf_link_fd__fd(const struct bpf_link_fd *link);
But otherwise these new APIs look great, thanks!
>
> LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
>
> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> index f9d316e873d8..b58dd0f0259c 100644
> --- a/tools/lib/bpf/libbpf.map
> +++ b/tools/lib/bpf/libbpf.map
> @@ -184,3 +184,9 @@ LIBBPF_0.0.4 {
> perf_buffer__new_raw;
> perf_buffer__poll;
> } LIBBPF_0.0.3;
> +
> +LIBBPF_0.0.5 {
> + global:
> + bpf_link__as_fd;
> + bpf_link_fd__fd;
> +} LIBBPF_0.0.4;
> --
> 2.20.1
>
On Fri, Aug 9, 2019 at 2:49 PM Daniel Xu <[email protected]> wrote:
>
> Signed-off-by: Daniel Xu <[email protected]>
> ---
Getting verbose, we might want to rethink this test checks at some
point, but I think it's fine for now.
Please fix typos below, but overall:
Acked-by: Andrii Nakryiko <[email protected]>
> .../selftests/bpf/prog_tests/attach_probe.c | 102 ++++++++++++++++++
> 1 file changed, 102 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
> index 5ecc267d98b0..bb53103ddb66 100644
> --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
> +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
> @@ -27,17 +27,27 @@ void test_attach_probe(void)
> const char *kretprobe_name = "kretprobe/sys_nanosleep";
> const char *uprobe_name = "uprobe/trigger_func";
> const char *uretprobe_name = "uretprobe/trigger_func";
> + struct perf_event_query_probe kprobe_query = {};
> + struct perf_event_query_probe kretprobe_query = {};
> + struct perf_event_query_probe uprobe_query = {};
> + struct perf_event_query_probe uretprobe_query = {};
> const int kprobe_idx = 0, kretprobe_idx = 1;
> const int uprobe_idx = 2, uretprobe_idx = 3;
> const char *file = "./test_attach_probe.o";
> struct bpf_program *kprobe_prog, *kretprobe_prog;
> struct bpf_program *uprobe_prog, *uretprobe_prog;
> struct bpf_object *obj;
> + const struct bpf_link_fd *kprobe_fd_link;
> + const struct bpf_link_fd *kretprobe_fd_link;
> + const struct bpf_link_fd *uprobe_fd_link;
> + const struct bpf_link_fd *uretprobe_fd_link;
> int err, prog_fd, duration = 0, res;
> struct bpf_link *kprobe_link = NULL;
> struct bpf_link *kretprobe_link = NULL;
> struct bpf_link *uprobe_link = NULL;
> struct bpf_link *uretprobe_link = NULL;
> + int kprobe_fd, kretprobe_fd;
> + int uprobe_fd, uretprobe_fd;
> int results_map_fd;
> size_t uprobe_offset;
> ssize_t base_addr;
> @@ -116,6 +126,52 @@ void test_attach_probe(void)
> /* trigger & validate kprobe && kretprobe */
> usleep(1);
>
> + kprobe_fd_link = bpf_link__as_fd(kprobe_link);
> + if (CHECK(!kprobe_fd_link, "kprobe_link_as_fd",
> + "failed to cast link to fd link\n"))
> + goto cleanup;
> +
> + kprobe_fd = bpf_link_fd__fd(kprobe_fd_link);
> + if (CHECK(kprobe_fd < 0, "kprobe_get_perf_fd",
> + "failed to get perf fd from kprobe link\n"))
> + goto cleanup;
> +
> + kretprobe_fd_link = bpf_link__as_fd(kretprobe_link);
> + if (CHECK(!kretprobe_fd_link, "kretprobe_link_as_fd",
> + "failed to cast link to fd link\n"))
> + goto cleanup;
> +
> + kretprobe_fd = bpf_link_fd__fd(kretprobe_fd_link);
> + if (CHECK(kretprobe_fd < 0, "kretprobe_get_perf_fd",
> + "failed to get perf fd from kretprobe link\n"))
> + goto cleanup;
> +
> + err = ioctl(kprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &kprobe_query);
> + if (CHECK(err, "get_kprobe_ioctl",
> + "failed to issue kprobe query ioctl\n"))
> + goto cleanup;
> + if (CHECK(kprobe_query.nmissed > 0, "get_kprobe_ioctl",
> + "read incorect nmissed from kprobe_ioctl: %llu\n",
typo: incorrect
> + kprobe_query.nmissed))
> + goto cleanup;
> + if (CHECK(kprobe_query.nhit == 0, "get_kprobe_ioctl",
> + "read incorect nhit from kprobe_ioctl: %llu\n",
typo: incorrect
> + kprobe_query.nhit))
> + goto cleanup;
> +
> + err = ioctl(kretprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &kretprobe_query);
> + if (CHECK(err, "get_kretprobe_ioctl",
> + "failed to issue kretprobe query ioctl\n"))
> + goto cleanup;
> + if (CHECK(kretprobe_query.nmissed > 0, "get_kretprobe_ioctl",
> + "read incorect nmissed from kretprobe_ioctl: %llu\n",
same typo :)
> + kretprobe_query.nmissed))
> + goto cleanup;
> + if (CHECK(kretprobe_query.nhit <= 0, "get_kretprobe_ioctl",
> + "read incorect nhit from kretprobe_ioctl: %llu\n",
the power of copy/paste! :)
> + kretprobe_query.nhit))
> + goto cleanup;
> +
> err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res);
> if (CHECK(err, "get_kprobe_res",
> "failed to get kprobe res: %d\n", err))
> @@ -135,6 +191,52 @@ void test_attach_probe(void)
> /* trigger & validate uprobe & uretprobe */
> get_base_addr();
>
> + uprobe_fd_link = bpf_link__as_fd(uprobe_link);
> + if (CHECK(!uprobe_fd_link, "uprobe_link_as_fd",
> + "failed to cast link to fd link\n"))
> + goto cleanup;
> +
> + uprobe_fd = bpf_link_fd__fd(uprobe_fd_link);
> + if (CHECK(uprobe_fd < 0, "uprobe_get_perf_fd",
> + "failed to get perf fd from uprobe link\n"))
> + goto cleanup;
> +
> + uretprobe_fd_link = bpf_link__as_fd(uretprobe_link);
> + if (CHECK(!uretprobe_fd_link, "uretprobe_link_as_fd",
> + "failed to cast link to fd link\n"))
> + goto cleanup;
> +
> + uretprobe_fd = bpf_link_fd__fd(uretprobe_fd_link);
> + if (CHECK(uretprobe_fd < 0, "uretprobe_get_perf_fd",
> + "failed to get perf fd from uretprobe link\n"))
> + goto cleanup;
> +
> + err = ioctl(uprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &uprobe_query);
> + if (CHECK(err, "get_uprobe_ioctl",
> + "failed to issue uprobe query ioctl\n"))
> + goto cleanup;
> + if (CHECK(uprobe_query.nmissed > 0, "get_uprobe_ioctl",
> + "read incorect nmissed from uprobe_ioctl: %llu\n",
> + uprobe_query.nmissed))
> + goto cleanup;
> + if (CHECK(uprobe_query.nhit == 0, "get_uprobe_ioctl",
> + "read incorect nhit from uprobe_ioctl: %llu\n",
> + uprobe_query.nhit))
> + goto cleanup;
> +
> + err = ioctl(uretprobe_fd, PERF_EVENT_IOC_QUERY_PROBE, &uretprobe_query);
> + if (CHECK(err, "get_uretprobe_ioctl",
> + "failed to issue uretprobe query ioctl\n"))
> + goto cleanup;
> + if (CHECK(uretprobe_query.nmissed > 0, "get_uretprobe_ioctl",
> + "read incorect nmissed from uretprobe_ioctl: %llu\n",
> + uretprobe_query.nmissed))
> + goto cleanup;
> + if (CHECK(uretprobe_query.nhit <= 0, "get_uretprobe_ioctl",
> + "read incorect nhit from uretprobe_ioctl: %llu\n",
> + uretprobe_query.nhit))
> + goto cleanup;
> +
same typos
> err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res);
> if (CHECK(err, "get_uprobe_res",
> "failed to get uprobe res: %d\n", err))
> --
> 2.20.1
>
On Mon, Aug 12, 2019, at 8:57 AM, Andrii Nakryiko wrote:
> On Fri, Aug 9, 2019 at 2:47 PM Daniel Xu <[email protected]> wrote:
> >
> > It's useful to know [uk]probe's nmissed and nhit stats. For example with
> > tracing tools, it's important to know when events may have been lost.
> > debugfs currently exposes a control file to get this information, but
> > it is not compatible with probes registered with the perf API.
> >
> > While bpf programs may be able to manually count nhit, there is no way
> > to gather nmissed. In other words, it is currently not possible to
> > retrieve information about FD-based probes.
> >
> > This patch adds a new ioctl that lets users query nmissed (as well as
> > nhit for completeness). We currently only add support for [uk]probes
> > but leave the possibility open for other probes like tracepoint.
> >
> > Signed-off-by: Daniel Xu <[email protected]>
> > ---
> > include/linux/trace_events.h | 12 ++++++++++++
> > include/uapi/linux/perf_event.h | 19 +++++++++++++++++++
> > kernel/events/core.c | 20 ++++++++++++++++++++
> > kernel/trace/trace_kprobe.c | 23 +++++++++++++++++++++++
> > kernel/trace/trace_uprobe.c | 23 +++++++++++++++++++++++
> > 5 files changed, 97 insertions(+)
> >
[...]
> > + struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> > + u64 nmissed, nhit;
> > +
> > + if (!capable(CAP_SYS_ADMIN))
> > + return -EPERM;
> > + if (copy_from_user(&query, uquery, sizeof(query)))
>
> what about forward/backward compatibility? Didn't you have a size
> field for perf_event_query_probe?
I initially did, yes. But after thinking about it more, I'm not convinced it
is necessary. It seems the last change to the debugfs counterpart was in
the initial comit cd7e7bd5e4, ~10 years ago. I cannot think of any other
information that would be useful off the top of my head, so I figured it'd
be best if we didn't make more complicated something that doesn't seem
likely to change. If we really needed something else, I figured adding
another ioctl is pretty cheap.
If you (or anyone) feels strongly about adding it back, I can make it a
u64 so there's no holes.
>
> > + return -EFAULT;
> > +
> > + nhit = trace_kprobe_nhit(tk);
> > + nmissed = tk->rp.kp.nmissed;
> > +
> > + if (put_user(nmissed, &uquery->nmissed) ||
> > + put_user(nhit, &uquery->nhit))
>
> Wouldn't it be nicer to just do one user put for entire struct (or at
> least relevant part of it with backward/forward compatibility?).
Not sure how that didn't occur to me. Thanks.
On Mon, Aug 12, 2019 at 5:39 PM Daniel Xu <[email protected]> wrote:
>
> On Mon, Aug 12, 2019, at 8:57 AM, Andrii Nakryiko wrote:
> > On Fri, Aug 9, 2019 at 2:47 PM Daniel Xu <[email protected]> wrote:
> > >
> > > It's useful to know [uk]probe's nmissed and nhit stats. For example with
> > > tracing tools, it's important to know when events may have been lost.
> > > debugfs currently exposes a control file to get this information, but
> > > it is not compatible with probes registered with the perf API.
> > >
> > > While bpf programs may be able to manually count nhit, there is no way
> > > to gather nmissed. In other words, it is currently not possible to
> > > retrieve information about FD-based probes.
> > >
> > > This patch adds a new ioctl that lets users query nmissed (as well as
> > > nhit for completeness). We currently only add support for [uk]probes
> > > but leave the possibility open for other probes like tracepoint.
> > >
> > > Signed-off-by: Daniel Xu <[email protected]>
> > > ---
> > > include/linux/trace_events.h | 12 ++++++++++++
> > > include/uapi/linux/perf_event.h | 19 +++++++++++++++++++
> > > kernel/events/core.c | 20 ++++++++++++++++++++
> > > kernel/trace/trace_kprobe.c | 23 +++++++++++++++++++++++
> > > kernel/trace/trace_uprobe.c | 23 +++++++++++++++++++++++
> > > 5 files changed, 97 insertions(+)
> > >
> [...]
> > > + struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> > > + u64 nmissed, nhit;
> > > +
> > > + if (!capable(CAP_SYS_ADMIN))
> > > + return -EPERM;
> > > + if (copy_from_user(&query, uquery, sizeof(query)))
Not sure why we are reading that struct in, if we never use that? With
size as a first argument (see below about compatiblity), I'd also read
just first 4 or 8 bytes only.
> >
> > what about forward/backward compatibility? Didn't you have a size
> > field for perf_event_query_probe?
>
> I initially did, yes. But after thinking about it more, I'm not convinced it
> is necessary. It seems the last change to the debugfs counterpart was in
> the initial comit cd7e7bd5e4, ~10 years ago. I cannot think of any other
> information that would be useful off the top of my head, so I figured it'd
> be best if we didn't make more complicated something that doesn't seem
> likely to change. If we really needed something else, I figured adding
> another ioctl is pretty cheap.
>
> If you (or anyone) feels strongly about adding it back, I can make it a
> u64 so there's no holes.
Debugfs is not stable API, so I guess that matters less. I think we
should support this forward/backward compatibility mechanism that
kernel implements for a lot of other stable APIs.
>
> >
> > > + return -EFAULT;
> > > +
> > > + nhit = trace_kprobe_nhit(tk);
> > > + nmissed = tk->rp.kp.nmissed;
> > > +
> > > + if (put_user(nmissed, &uquery->nmissed) ||
> > > + put_user(nhit, &uquery->nhit))
> >
> > Wouldn't it be nicer to just do one user put for entire struct (or at
> > least relevant part of it with backward/forward compatibility?).
>
> Not sure how that didn't occur to me. Thanks.
Once you add back size field for compatibility, doing it with one call
will make it easier to write only first N requested bytes.
On Fri, Aug 9, 2019 at 2:48 PM Daniel Xu <[email protected]> wrote:
>
> It's useful to know [uk]probe's nmissed and nhit stats. For example with
> tracing tools, it's important to know when events may have been lost.
> debugfs currently exposes a control file to get this information, but
> it is not compatible with probes registered with the perf API.
>
> While bpf programs may be able to manually count nhit, there is no way
> to gather nmissed. In other words, it is currently not possible to
> retrieve information about FD-based probes.
>
> This patch adds a new ioctl that lets users query nmissed (as well as
> nhit for completeness). We currently only add support for [uk]probes
> but leave the possibility open for other probes like tracepoint.
>
> Signed-off-by: Daniel Xu <[email protected]>
> ---
[...]
> +int perf_uprobe_event_query(struct perf_event *event, void __user *info)
> +{
> + struct perf_event_query_probe __user *uquery = info;
> + struct perf_event_query_probe query = {};
> + struct trace_event_call *call = event->tp_event;
> + struct trace_uprobe *tu = (struct trace_uprobe *)call->data;
> + u64 nmissed, nhit;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> + if (copy_from_user(&query, uquery, sizeof(query)))
> + return -EFAULT;
> +
> + nhit = tu->nhit;
> + nmissed = 0;
Blindly return 0 is a little weird. Maybe return 0xffffffffffffffff so
that the user
can tell this is not a valid 0. Or some other idea?
Thanks,
Song
On Tue, Aug 13, 2019, at 2:47 PM, Song Liu wrote:
> On Fri, Aug 9, 2019 at 2:48 PM Daniel Xu <[email protected]> wrote:
> >
> > It's useful to know [uk]probe's nmissed and nhit stats. For example with
> > tracing tools, it's important to know when events may have been lost.
> > debugfs currently exposes a control file to get this information, but
> > it is not compatible with probes registered with the perf API.
> >
> > While bpf programs may be able to manually count nhit, there is no way
> > to gather nmissed. In other words, it is currently not possible to
> > retrieve information about FD-based probes.
> >
> > This patch adds a new ioctl that lets users query nmissed (as well as
> > nhit for completeness). We currently only add support for [uk]probes
> > but leave the possibility open for other probes like tracepoint.
> >
> > Signed-off-by: Daniel Xu <[email protected]>
> > ---
> [...]
>
> > +int perf_uprobe_event_query(struct perf_event *event, void __user *info)
> > +{
> > + struct perf_event_query_probe __user *uquery = info;
> > + struct perf_event_query_probe query = {};
> > + struct trace_event_call *call = event->tp_event;
> > + struct trace_uprobe *tu = (struct trace_uprobe *)call->data;
> > + u64 nmissed, nhit;
> > +
> > + if (!capable(CAP_SYS_ADMIN))
> > + return -EPERM;
> > + if (copy_from_user(&query, uquery, sizeof(query)))
> > + return -EFAULT;
> > +
> > + nhit = tu->nhit;
> > + nmissed = 0;
>
> Blindly return 0 is a little weird. Maybe return 0xffffffffffffffff so
> that the user
> can tell this is not a valid 0. Or some other idea?
>
> Thanks,
> Song
>
My (maybe flawed) understanding is that uprobes cannot really miss the same way
a kprobe can. From skimming the code a little, it seems the main reason kprobes
can miss is when the processing of one kprobe results in hitting another kprobe.
The latter cannot be handled for whatever reason. The same cannot really happen
for uprobes as kernel doesn't call into userspace. That's why I made it 0 (that and
the fact I didn't see any accounting for uprobe misses).
cc Srikar who authored the uprobe patches.
Srikar, do you mind clarifying if uprobes can miss?
Thanks,
Daniel