Hello,
I've implemented 'latency' subcommand in the perf ftrace command to
show a histogram of function latency.
To handle new subcommands, the existing functionality is moved to
'trace' subcommand while preserving backward compatibility of not
having a subcommand at all (defaults to 'trace').
The latency subcommand accepts a target (kernel, for now) function
with -T option and shows a histogram like below:
$ sudo ./perf ftrace latency -a -T mutex_lock sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 2686 | ###################### |
1 - 2 us | 976 | ######## |
2 - 4 us | 879 | ####### |
4 - 8 us | 481 | #### |
8 - 16 us | 445 | ### |
16 - 32 us | 1 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
It basically use the function graph tracer to extract the duration of
the function. But with -b/--use-bpf option, it can use BPF to save
the histogram in the kernel. For the same function, it gets:
$ sudo ./perf ftrace latency -a -b -T mutex_lock sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 4682 | ############################################# |
1 - 2 us | 11 | |
2 - 4 us | 0 | |
4 - 8 us | 0 | |
8 - 16 us | 7 | |
16 - 32 us | 6 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
You can get the patches at 'perf/ftrace-latency-v1' branch on
https://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
Thanks,
Namhyung
Namhyung Kim (5):
perf ftrace: Add 'trace' subcommand
perf ftrace: Move out common code from __cmd_ftrace
perf ftrace: Add 'latency' subcommand
perf ftrace: Add -b/--use-bpf option for latency subcommand
perf ftrace: Implement cpu and task filters in BPF
tools/perf/Makefile.perf | 2 +-
tools/perf/builtin-ftrace.c | 443 +++++++++++++++++---
tools/perf/util/Build | 1 +
tools/perf/util/bpf_ftrace.c | 154 +++++++
tools/perf/util/bpf_skel/func_latency.bpf.c | 113 +++++
tools/perf/util/ftrace.h | 81 ++++
6 files changed, 724 insertions(+), 70 deletions(-)
create mode 100644 tools/perf/util/bpf_ftrace.c
create mode 100644 tools/perf/util/bpf_skel/func_latency.bpf.c
create mode 100644 tools/perf/util/ftrace.h
base-commit: 8ab774587903771821b59471cc723bba6d893942
--
2.34.0.rc2.393.gf8c9666880-goog
The signal setup code and evlist__prepare_workload() can be used for
other subcommands. Let's move them out of the __cmd_ftrace(). Then
it doesn't need to pass argc and argv.
On the other hand, select_tracer() is specific to the 'trace'
subcommand so it'd better moving it into the __cmd_ftrace().
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-ftrace.c | 63 +++++++++++++++++++------------------
1 file changed, 33 insertions(+), 30 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index b28e762c5d54..0f8310bd0e6c 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -565,7 +565,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return 0;
}
-static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
+static void select_tracer(struct perf_ftrace *ftrace)
+{
+ bool graph = !list_empty(&ftrace->graph_funcs) ||
+ !list_empty(&ftrace->nograph_funcs);
+ bool func = !list_empty(&ftrace->filters) ||
+ !list_empty(&ftrace->notrace);
+
+ /* The function_graph has priority over function tracer. */
+ if (graph)
+ ftrace->tracer = "function_graph";
+ else if (func)
+ ftrace->tracer = "function";
+ /* Otherwise, the default tracer is used. */
+
+ pr_debug("%s tracer is used\n", ftrace->tracer);
+}
+
+static int __cmd_ftrace(struct perf_ftrace *ftrace)
{
char *trace_file;
int trace_fd;
@@ -586,10 +603,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
return -1;
}
- signal(SIGINT, sig_handler);
- signal(SIGUSR1, sig_handler);
- signal(SIGCHLD, sig_handler);
- signal(SIGPIPE, sig_handler);
+ select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
@@ -600,11 +614,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
if (write_tracing_file("trace", "0") < 0)
goto out;
- if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
- ftrace__workload_exec_failed_signal) < 0) {
- goto out;
- }
-
if (set_tracing_options(ftrace) < 0)
goto out_reset;
@@ -855,23 +864,6 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
-static void select_tracer(struct perf_ftrace *ftrace)
-{
- bool graph = !list_empty(&ftrace->graph_funcs) ||
- !list_empty(&ftrace->nograph_funcs);
- bool func = !list_empty(&ftrace->filters) ||
- !list_empty(&ftrace->notrace);
-
- /* The function_graph has priority over function tracer. */
- if (graph)
- ftrace->tracer = "function_graph";
- else if (func)
- ftrace->tracer = "function";
- /* Otherwise, the default tracer is used. */
-
- pr_debug("%s tracer is used\n", ftrace->tracer);
-}
-
int cmd_ftrace(int argc, const char **argv)
{
int ret;
@@ -937,6 +929,11 @@ int cmd_ftrace(int argc, const char **argv)
INIT_LIST_HEAD(&ftrace.graph_funcs);
INIT_LIST_HEAD(&ftrace.nograph_funcs);
+ signal(SIGINT, sig_handler);
+ signal(SIGUSR1, sig_handler);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGPIPE, sig_handler);
+
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
@@ -951,8 +948,6 @@ int cmd_ftrace(int argc, const char **argv)
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
- select_tracer(&ftrace);
-
ret = target__validate(&ftrace.target);
if (ret) {
char errbuf[512];
@@ -972,7 +967,15 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
goto out_delete_evlist;
- ret = __cmd_ftrace(&ftrace, argc, argv);
+ if (argc) {
+ ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
+ argv, false,
+ ftrace__workload_exec_failed_signal);
+ if (ret < 0)
+ goto out_delete_evlist;
+ }
+
+ ret = __cmd_ftrace(&ftrace);
out_delete_evlist:
evlist__delete(ftrace.evlist);
--
2.34.0.rc2.393.gf8c9666880-goog
The perf ftrace latency is to get a histogram of function execution
time. Users should give a function name using -T option.
This is implemented using function_graph tracer with the given
function only. And it parses the output to extract the time.
$ sudo perf ftrace latency -a -T mutex_lock sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 4596 | ######################## |
1 - 2 us | 1680 | ######### |
2 - 4 us | 1106 | ##### |
4 - 8 us | 546 | ## |
8 - 16 us | 562 | ### |
16 - 32 us | 1 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-ftrace.c | 285 ++++++++++++++++++++++++++++++++++--
1 file changed, 276 insertions(+), 9 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 0f8310bd0e6c..8fd3c9c44c69 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -13,7 +13,9 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
+#include <math.h>
#include <poll.h>
+#include <ctype.h>
#include <linux/capability.h>
#include <linux/string.h>
@@ -702,6 +704,224 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
+#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
+
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+{
+ char *p, *q;
+ char *unit;
+ double num;
+ int i;
+
+ /* ensure NUL termination */
+ buf[len] = '\0';
+
+ /* handle data line by line */
+ for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
+ *q = '\0';
+ /* move it to the line buffer */
+ strcat(linebuf, p);
+
+ /*
+ * parse trace output to get function duration like in
+ *
+ * # tracer: function_graph
+ * #
+ * # CPU DURATION FUNCTION CALLS
+ * # | | | | | | |
+ * 1) + 10.291 us | do_filp_open();
+ * 1) 4.889 us | do_filp_open();
+ * 1) 6.086 us | do_filp_open();
+ *
+ */
+ if (linebuf[0] == '#')
+ goto next;
+
+ /* ignore CPU */
+ p = strchr(linebuf, ')');
+ if (p == NULL)
+ p = linebuf;
+
+ while (*p && !isdigit(*p) && (*p != '|'))
+ p++;
+
+ /* no duration */
+ if (*p == '\0' || *p == '|')
+ goto next;
+
+ num = strtod(p, &unit);
+ if (!unit || strncmp(unit, " us", 3))
+ goto next;
+
+ i = log2(num);
+ if (i < 0)
+ i = 0;
+ if (i >= NUM_BUCKET)
+ i = NUM_BUCKET - 1;
+
+ buckets[i]++;
+
+next:
+ /* empty the line buffer for the next output */
+ linebuf[0] = '\0';
+ }
+
+ /* preserve any remaining output (before newline) */
+ strcat(linebuf, p);
+}
+
+static void display_histogram(int buckets[])
+{
+ int i;
+ int total = 0;
+ int bar_total = 46; /* to fit in 80 column */
+ char bar[] = "###############################################";
+ int bar_len;
+
+ for (i = 0; i < NUM_BUCKET; i++)
+ total += buckets[i];
+
+ if (total == 0) {
+ printf("No data found\n");
+ return;
+ }
+
+ printf("# %14s | %10s | %-*s |\n",
+ " DURATION ", "COUNT", bar_total, "GRAPH");
+
+ bar_len = buckets[0] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
+
+ for (i = 1; i < NUM_BUCKET - 1; i++) {
+ int start = (1 << (i - 1));
+ int stop = 1 << i;
+ const char *unit = "us";
+
+ if (start >= 1024) {
+ start >>= 10;
+ stop >>= 10;
+ unit = "ms";
+ }
+ bar_len = buckets[i] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ start, stop, unit, buckets[i], bar_len, bar,
+ bar_total - bar_len, "");
+ }
+
+ bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
+ printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
+ 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
+ bar_total - bar_len, "");
+
+}
+
+static int __cmd_latency(struct perf_ftrace *ftrace)
+{
+ char *trace_file;
+ int trace_fd;
+ char buf[4096];
+ char line[256];
+ struct pollfd pollfd = {
+ .events = POLLIN,
+ };
+ int buckets[NUM_BUCKET] = { };
+
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
+ pr_err("ftrace only works for %s!\n",
+#ifdef HAVE_LIBCAP_SUPPORT
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
+#else
+ "root"
+#endif
+ );
+ return -1;
+ }
+
+ if (reset_tracing_files(ftrace) < 0) {
+ pr_err("failed to reset ftrace\n");
+ goto out;
+ }
+
+ /* reset ftrace buffer */
+ if (write_tracing_file("trace", "0") < 0)
+ goto out;
+
+ if (set_tracing_options(ftrace) < 0)
+ goto out_reset;
+
+ /* force to use the function_graph tracer to track duration */
+ if (write_tracing_file("current_tracer", "function_graph") < 0) {
+ pr_err("failed to set current_tracer to function_graph\n");
+ goto out_reset;
+ }
+
+ trace_file = get_tracing_file("trace_pipe");
+ if (!trace_file) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ trace_fd = open(trace_file, O_RDONLY);
+
+ put_tracing_file(trace_file);
+
+ if (trace_fd < 0) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+ pollfd.fd = trace_fd;
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
+
+ evlist__start_workload(ftrace->evlist);
+
+ line[0] = '\0';
+ while (!done) {
+ if (poll(&pollfd, 1, -1) < 0)
+ break;
+
+ if (pollfd.revents & POLLIN) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n < 0)
+ break;
+
+ make_histogram(buckets, buf, n, line);
+ }
+ }
+
+ write_tracing_file("tracing_on", "0");
+
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ pr_err("workload failed: %s\n", emsg);
+ goto out_close_fd;
+ }
+
+ /* read remaining buffer contents */
+ while (true) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n <= 0)
+ break;
+ make_histogram(buckets, buf, n, line);
+ }
+
+ display_histogram(buckets);
+
+out_close_fd:
+ close(trace_fd);
+out_reset:
+ reset_tracing_files(ftrace);
+out:
+ return (done && !workload_exec_errno) ? 0 : -1;
+}
+
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@@ -864,6 +1084,12 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
+enum perf_ftrace_subcommand {
+ PERF_FTRACE_NONE,
+ PERF_FTRACE_TRACE,
+ PERF_FTRACE_LATENCY,
+};
+
int cmd_ftrace(int argc, const char **argv)
{
int ret;
@@ -915,14 +1141,21 @@ int cmd_ftrace(int argc, const char **argv)
"Number of milliseconds to wait before starting tracing after program start"),
OPT_PARENT(common_options),
};
+ const struct option latency_options[] = {
+ OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+ "Show latency of given function", parse_filter_func),
+ OPT_PARENT(common_options),
+ };
+ const struct option *options = ftrace_options;
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- [<command>] [<options>]",
- "perf ftrace trace [<options>] [<command>]",
- "perf ftrace trace [<options>] -- [<command>] [<options>]",
+ "perf ftrace {trace|latency} [<options>] [<command>]",
+ "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
NULL
};
+ enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
INIT_LIST_HEAD(&ftrace.filters);
INIT_LIST_HEAD(&ftrace.notrace);
@@ -938,15 +1171,29 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
return -1;
- if (argc > 1 && !strcmp(argv[1], "trace")) {
- argc--;
- argv++;
+ if (argc > 1) {
+ if (!strcmp(argv[1], "trace")) {
+ subcmd = PERF_FTRACE_TRACE;
+ } else if (!strcmp(argv[1], "latency")) {
+ subcmd = PERF_FTRACE_LATENCY;
+ options = latency_options;
+ }
+
+ if (subcmd != PERF_FTRACE_NONE) {
+ argc--;
+ argv++;
+ }
}
+ /* for backward compatibility */
+ if (subcmd == PERF_FTRACE_NONE)
+ subcmd = PERF_FTRACE_TRACE;
- argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
+ argc = parse_options(argc, argv, options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc && target__none(&ftrace.target))
- ftrace.target.system_wide = true;
+ if (argc < 0) {
+ ret = -EINVAL;
+ goto out_delete_filters;
+ }
ret = target__validate(&ftrace.target);
if (ret) {
@@ -975,7 +1222,27 @@ int cmd_ftrace(int argc, const char **argv)
goto out_delete_evlist;
}
- ret = __cmd_ftrace(&ftrace);
+ switch (subcmd) {
+ case PERF_FTRACE_TRACE:
+ if (!argc && target__none(&ftrace.target))
+ ftrace.target.system_wide = true;
+ ret = __cmd_ftrace(&ftrace);
+ break;
+ case PERF_FTRACE_LATENCY:
+ if (list_empty(&ftrace.filters)) {
+ pr_err("Should provide a function to measure\n");
+ parse_options_usage(ftrace_usage, options, "T", 1);
+ ret = -EINVAL;
+ goto out_delete_evlist;
+ }
+ ret = __cmd_latency(&ftrace);
+ break;
+ case PERF_FTRACE_NONE:
+ default:
+ pr_err("Invalid subcommand\n");
+ ret = -EINVAL;
+ break;
+ }
out_delete_evlist:
evlist__delete(ftrace.evlist);
--
2.34.0.rc2.393.gf8c9666880-goog
Honor cpu and task options to set up filters (by pid or tid) in the
BPF program. For example, the following command will show latency of
the mutex_lock for process 2570.
# perf ftrace latency -b -T mutex_lock -p 2570 sleep 3
# DURATION | COUNT | GRAPH |
0 - 1 us | 675 | ############################## |
1 - 2 us | 9 | |
2 - 4 us | 0 | |
4 - 8 us | 0 | |
8 - 16 us | 0 | |
16 - 32 us | 0 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/util/bpf_ftrace.c | 41 +++++++++++++++++++++
tools/perf/util/bpf_skel/func_latency.bpf.c | 21 +++++++++++
2 files changed, 62 insertions(+)
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 1975a6fe73c9..18d6f1558163 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -6,7 +6,10 @@
#include <linux/err.h>
#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
#include "util/debug.h"
+#include "util/evlist.h"
#include "util/bpf_counter.h"
#include "util/bpf_skel/func_latency.skel.h"
@@ -16,6 +19,7 @@ static struct func_latency_bpf *skel;
int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
{
int fd, err;
+ int i, ncpus = 1, ntasks = 1;
struct filter_entry *func;
struct bpf_link *begin_link, *end_link;
@@ -33,6 +37,17 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
return -1;
}
+ /* don't need to set cpu filter for system-wide mode */
+ if (ftrace->target.cpu_list) {
+ ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
set_max_rlimit();
err = func_latency_bpf__load(skel);
@@ -41,6 +56,32 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
goto out;
}
+ if (ftrace->target.cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = cpu_map__cpu(ftrace->evlist->core.cpus, i);
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
begin_link = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(begin_link)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index d7d31cfeabf8..17a2a53b4f16 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -36,6 +36,8 @@ struct {
int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -46,6 +48,25 @@ int BPF_PROG(func_begin)
return 0;
key = bpf_get_current_pid_tgid();
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u32 pid = key & 0xffffffff;
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
now = bpf_ktime_get_ns();
// overwrite timestamp for nested functions
--
2.34.0.rc2.393.gf8c9666880-goog
The -b/--use-bpf option is to use BPF to get latency info of kernel
functions. It'd have better performance impact and I observed that
latency of same function is smaller than before when using BPF.
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/Makefile.perf | 2 +-
tools/perf/builtin-ftrace.c | 156 +++++++++++---------
tools/perf/util/Build | 1 +
tools/perf/util/bpf_ftrace.c | 113 ++++++++++++++
tools/perf/util/bpf_skel/func_latency.bpf.c | 92 ++++++++++++
tools/perf/util/ftrace.h | 81 ++++++++++
6 files changed, 378 insertions(+), 67 deletions(-)
create mode 100644 tools/perf/util/bpf_ftrace.c
create mode 100644 tools/perf/util/bpf_skel/func_latency.bpf.c
create mode 100644 tools/perf/util/ftrace.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 80522bcfafe0..a861b92ac6f9 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
-SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h
+SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 8fd3c9c44c69..cde1e87032a1 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -30,36 +30,12 @@
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
+#include "util/ftrace.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
-struct perf_ftrace {
- struct evlist *evlist;
- struct target target;
- const char *tracer;
- struct list_head filters;
- struct list_head notrace;
- struct list_head graph_funcs;
- struct list_head nograph_funcs;
- int graph_depth;
- unsigned long percpu_buffer_size;
- bool inherit;
- int func_stack_trace;
- int func_irq_info;
- int graph_nosleep_time;
- int graph_noirqs;
- int graph_verbose;
- int graph_thresh;
- unsigned int initial_delay;
-};
-
-struct filter_entry {
- struct list_head list;
- char name[];
-};
-
static volatile int workload_exec_errno;
static bool done;
@@ -704,8 +680,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
-#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
-
static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
{
char *p, *q;
@@ -816,69 +790,116 @@ static void display_histogram(int buckets[])
}
-static int __cmd_latency(struct perf_ftrace *ftrace)
+static int prepare_func_latency(struct perf_ftrace *ftrace)
{
char *trace_file;
- int trace_fd;
- char buf[4096];
- char line[256];
- struct pollfd pollfd = {
- .events = POLLIN,
- };
- int buckets[NUM_BUCKET] = { };
+ int fd;
- if (!(perf_cap__capable(CAP_PERFMON) ||
- perf_cap__capable(CAP_SYS_ADMIN))) {
- pr_err("ftrace only works for %s!\n",
-#ifdef HAVE_LIBCAP_SUPPORT
- "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
-#else
- "root"
-#endif
- );
- return -1;
- }
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_prepare_bpf(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
- goto out;
+ return -1;
}
/* reset ftrace buffer */
if (write_tracing_file("trace", "0") < 0)
- goto out;
+ return -1;
if (set_tracing_options(ftrace) < 0)
- goto out_reset;
+ return -1;
/* force to use the function_graph tracer to track duration */
if (write_tracing_file("current_tracer", "function_graph") < 0) {
pr_err("failed to set current_tracer to function_graph\n");
- goto out_reset;
+ return -1;
}
trace_file = get_tracing_file("trace_pipe");
if (!trace_file) {
pr_err("failed to open trace_pipe\n");
- goto out_reset;
+ return -1;
}
- trace_fd = open(trace_file, O_RDONLY);
+ fd = open(trace_file, O_RDONLY);
+ if (fd < 0)
+ pr_err("failed to open trace_pipe\n");
put_tracing_file(trace_file);
+ return fd;
+}
- if (trace_fd < 0) {
- pr_err("failed to open trace_pipe\n");
- goto out_reset;
+static int start_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_start_bpf(ftrace);
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int stop_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_stop_bpf(ftrace);
+
+ write_tracing_file("tracing_on", "0");
+ return 0;
+}
+
+static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_read_bpf(ftrace, buckets);
+
+ return 0;
+}
+
+static int cleanup_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_cleanup_bpf(ftrace);
+
+ reset_tracing_files(ftrace);
+ return 0;
+}
+
+static int __cmd_latency(struct perf_ftrace *ftrace)
+{
+ int trace_fd;
+ char buf[4096];
+ char line[256];
+ struct pollfd pollfd = {
+ .events = POLLIN,
+ };
+ int buckets[NUM_BUCKET] = { };
+
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
+ pr_err("ftrace only works for %s!\n",
+#ifdef HAVE_LIBCAP_SUPPORT
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
+#else
+ "root"
+#endif
+ );
+ return -1;
}
+ trace_fd = prepare_func_latency(ftrace);
+ if (trace_fd < 0)
+ goto out;
+
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
pollfd.fd = trace_fd;
- if (write_tracing_file("tracing_on", "1") < 0) {
- pr_err("can't enable tracing\n");
- goto out_close_fd;
- }
+ if (start_func_latency(ftrace) < 0)
+ goto out;
evlist__start_workload(ftrace->evlist);
@@ -896,29 +917,30 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
}
}
- write_tracing_file("tracing_on", "0");
+ stop_func_latency(ftrace);
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
pr_err("workload failed: %s\n", emsg);
- goto out_close_fd;
+ goto out;
}
/* read remaining buffer contents */
- while (true) {
+ while (!ftrace->target.use_bpf) {
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n <= 0)
break;
make_histogram(buckets, buf, n, line);
}
+ read_func_latency(ftrace, buckets);
+
display_histogram(buckets);
-out_close_fd:
- close(trace_fd);
-out_reset:
- reset_tracing_files(ftrace);
out:
+ close(trace_fd);
+ cleanup_func_latency(ftrace);
+
return (done && !workload_exec_errno) ? 0 : -1;
}
@@ -1144,6 +1166,8 @@ int cmd_ftrace(int argc, const char **argv)
const struct option latency_options[] = {
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Show latency of given function", parse_filter_func),
+ OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
+ "Use BPF to measure function latency"),
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2e5bfbb69960..294b12430d73 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -144,6 +144,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
new file mode 100644
index 000000000000..1975a6fe73c9
--- /dev/null
+++ b/tools/perf/util/bpf_ftrace.c
@@ -0,0 +1,113 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <linux/err.h>
+
+#include "util/ftrace.h"
+#include "util/debug.h"
+#include "util/bpf_counter.h"
+
+#include "util/bpf_skel/func_latency.skel.h"
+
+static struct func_latency_bpf *skel;
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
+{
+ int fd, err;
+ struct filter_entry *func;
+ struct bpf_link *begin_link, *end_link;
+
+ if (!list_is_singular(&ftrace->filters)) {
+ pr_err("ERROR: %s target function(s).\n",
+ list_empty(&ftrace->filters) ? "No" : "Too many");
+ return -1;
+ }
+
+ func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+
+ skel = func_latency_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open func latency skeleton\n");
+ return -1;
+ }
+
+ set_max_rlimit();
+
+ err = func_latency_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load func latency skeleton\n");
+ goto out;
+ }
+
+ begin_link = bpf_program__attach_kprobe(skel->progs.func_begin,
+ false, func->name);
+ if (IS_ERR(begin_link)) {
+ pr_err("Failed to attach fentry program\n");
+ err = PTR_ERR(begin_link);
+ goto out;
+ }
+
+ end_link = bpf_program__attach_kprobe(skel->progs.func_end,
+ true, func->name);
+ if (IS_ERR(end_link)) {
+ pr_err("Failed to attach fexit program\n");
+ err = PTR_ERR(end_link);
+ bpf_link__destroy(begin_link);
+ goto out;
+ }
+
+ /* XXX: we don't actually use this fd - just for poll() */
+ return open("/dev/null", O_RDONLY);
+
+out:
+ return err;
+}
+
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[])
+{
+ int i, fd, err;
+ u32 idx;
+ u64 *hist;
+ int ncpus = cpu__max_cpu();
+
+ fd = bpf_map__fd(skel->maps.latency);
+
+ hist = calloc(ncpus, sizeof(*hist));
+ if (hist == NULL)
+ return -ENOMEM;
+
+ for (idx = 0; idx < NUM_BUCKET; idx++) {
+ err = bpf_map_lookup_elem(fd, &idx, hist);
+ if (err) {
+ buckets[idx] = 0;
+ continue;
+ }
+
+ for (i = 0; i < ncpus; i++)
+ buckets[idx] += hist[i];
+ }
+
+ free(hist);
+ return 0;
+}
+
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ func_latency_bpf__destroy(skel);
+ return 0;
+}
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
new file mode 100644
index 000000000000..d7d31cfeabf8
--- /dev/null
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define NUM_BUCKET 22
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 10000);
+} functime SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, NUM_BUCKET);
+} latency SEC(".maps");
+
+
+int enabled = 0;
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+ __u64 key, now;
+
+ if (!enabled)
+ return 0;
+
+ key = bpf_get_current_pid_tgid();
+ now = bpf_ktime_get_ns();
+
+ // overwrite timestamp for nested functions
+ bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+ return 0;
+}
+
+SEC("kretprobe/func")
+int BPF_PROG(func_end)
+{
+ __u64 tid;
+ __u64 *start;
+
+ if (!enabled)
+ return 0;
+
+ tid = bpf_get_current_pid_tgid();
+
+ start = bpf_map_lookup_elem(&functime, &tid);
+ if (start) {
+ __s64 delta = bpf_ktime_get_ns() - *start;
+ __u32 key;
+ __u64 *hist;
+
+ bpf_map_delete_elem(&functime, &tid);
+
+ if (delta < 0)
+ return 0;
+
+ // calculate index using delta in usec
+ for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ if (delta < ((1000UL) << key))
+ break;
+ }
+
+ hist = bpf_map_lookup_elem(&latency, &key);
+ if (!hist)
+ return 0;
+
+ *hist += 1;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
new file mode 100644
index 000000000000..887f68a185f7
--- /dev/null
+++ b/tools/perf/util/ftrace.h
@@ -0,0 +1,81 @@
+#ifndef __PERF_FTRACE_H__
+#define __PERF_FTRACE_H__
+
+#include <linux/list.h>
+
+#include "target.h"
+
+struct evlist;
+
+struct perf_ftrace {
+ struct evlist *evlist;
+ struct target target;
+ const char *tracer;
+ struct list_head filters;
+ struct list_head notrace;
+ struct list_head graph_funcs;
+ struct list_head nograph_funcs;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ int graph_depth;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+ unsigned int initial_delay;
+};
+
+struct filter_entry {
+ struct list_head list;
+ char name[];
+};
+
+#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+ int buckets[]);
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[] __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_FTRACE_H__ */
--
2.34.0.rc2.393.gf8c9666880-goog
This is a preparation to add more sub-commands for ftrace. The
'trace' subcommand does the same thing when no subcommand is given.
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-ftrace.c | 35 +++++++++++++++++++++++------------
1 file changed, 23 insertions(+), 12 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 87cb11a7a3ee..b28e762c5d54 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -879,17 +879,7 @@ int cmd_ftrace(int argc, const char **argv)
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
};
- const char * const ftrace_usage[] = {
- "perf ftrace [<options>] [<command>]",
- "perf ftrace [<options>] -- <command> [<options>]",
- NULL
- };
- const struct option ftrace_options[] = {
- OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
- "Tracer to use: function_graph(default) or function"),
- OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
- "Show available functions to filter",
- opt_list_avail_functions, "*"),
+ const struct option common_options[] = {
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
"Trace on existing process id"),
/* TODO: Add short option -t after -t/--tracer can be removed. */
@@ -901,6 +891,14 @@ int cmd_ftrace(int argc, const char **argv)
"System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
"List of cpus to monitor"),
+ OPT_END()
+ };
+ const struct option ftrace_options[] = {
+ OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
+ "Tracer to use: function_graph(default) or function"),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show available functions to filter",
+ opt_list_avail_functions, "*"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Trace given functions using function tracer",
parse_filter_func),
@@ -923,7 +921,15 @@ int cmd_ftrace(int argc, const char **argv)
"Trace children processes"),
OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
"Number of milliseconds to wait before starting tracing after program start"),
- OPT_END()
+ OPT_PARENT(common_options),
+ };
+
+ const char * const ftrace_usage[] = {
+ "perf ftrace [<options>] [<command>]",
+ "perf ftrace [<options>] -- [<command>] [<options>]",
+ "perf ftrace trace [<options>] [<command>]",
+ "perf ftrace trace [<options>] -- [<command>] [<options>]",
+ NULL
};
INIT_LIST_HEAD(&ftrace.filters);
@@ -935,6 +941,11 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
return -1;
+ if (argc > 1 && !strcmp(argv[1], "trace")) {
+ argc--;
+ argv++;
+ }
+
argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target__none(&ftrace.target))
--
2.34.0.rc2.393.gf8c9666880-goog
Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> Hello,
>
> I've implemented 'latency' subcommand in the perf ftrace command to
> show a histogram of function latency.
>
> To handle new subcommands, the existing functionality is moved to
> 'trace' subcommand while preserving backward compatibility of not
> having a subcommand at all (defaults to 'trace').
>
> The latency subcommand accepts a target (kernel, for now) function
> with -T option and shows a histogram like below:
Humm, wouldn't be interesting to shorten this by having a new 'perf
flat' (function latency) tool, on the same level as 'perf ftrace' and
leave 'perf ftrace' to just being a convenient perf interface to what
ftrace provides?
But all around, nice work, cool new toyz! :-)
- Arnaldo
> $ sudo ./perf ftrace latency -a -T mutex_lock sleep 1
> # DURATION | COUNT | GRAPH |
> 0 - 1 us | 2686 | ###################### |
> 1 - 2 us | 976 | ######## |
> 2 - 4 us | 879 | ####### |
> 4 - 8 us | 481 | #### |
> 8 - 16 us | 445 | ### |
> 16 - 32 us | 1 | |
> 32 - 64 us | 0 | |
> 64 - 128 us | 0 | |
> 128 - 256 us | 0 | |
> 256 - 512 us | 0 | |
> 512 - 1024 us | 0 | |
> 1 - 2 ms | 0 | |
> 2 - 4 ms | 0 | |
> 4 - 8 ms | 0 | |
> 8 - 16 ms | 0 | |
> 16 - 32 ms | 0 | |
> 32 - 64 ms | 0 | |
> 64 - 128 ms | 0 | |
> 128 - 256 ms | 0 | |
> 256 - 512 ms | 0 | |
> 512 - 1024 ms | 0 | |
> 1 - ... s | 0 | |
>
> It basically use the function graph tracer to extract the duration of
> the function. But with -b/--use-bpf option, it can use BPF to save
> the histogram in the kernel. For the same function, it gets:
>
> $ sudo ./perf ftrace latency -a -b -T mutex_lock sleep 1
> # DURATION | COUNT | GRAPH |
> 0 - 1 us | 4682 | ############################################# |
> 1 - 2 us | 11 | |
> 2 - 4 us | 0 | |
> 4 - 8 us | 0 | |
> 8 - 16 us | 7 | |
> 16 - 32 us | 6 | |
> 32 - 64 us | 0 | |
> 64 - 128 us | 0 | |
> 128 - 256 us | 0 | |
> 256 - 512 us | 0 | |
> 512 - 1024 us | 0 | |
> 1 - 2 ms | 0 | |
> 2 - 4 ms | 0 | |
> 4 - 8 ms | 0 | |
> 8 - 16 ms | 0 | |
> 16 - 32 ms | 0 | |
> 32 - 64 ms | 0 | |
> 64 - 128 ms | 0 | |
> 128 - 256 ms | 0 | |
> 256 - 512 ms | 0 | |
> 512 - 1024 ms | 0 | |
> 1 - ... s | 0 | |
>
>
> You can get the patches at 'perf/ftrace-latency-v1' branch on
>
> https://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>
>
> Thanks,
> Namhyung
>
>
> Namhyung Kim (5):
> perf ftrace: Add 'trace' subcommand
> perf ftrace: Move out common code from __cmd_ftrace
> perf ftrace: Add 'latency' subcommand
> perf ftrace: Add -b/--use-bpf option for latency subcommand
> perf ftrace: Implement cpu and task filters in BPF
>
> tools/perf/Makefile.perf | 2 +-
> tools/perf/builtin-ftrace.c | 443 +++++++++++++++++---
> tools/perf/util/Build | 1 +
> tools/perf/util/bpf_ftrace.c | 154 +++++++
> tools/perf/util/bpf_skel/func_latency.bpf.c | 113 +++++
> tools/perf/util/ftrace.h | 81 ++++
> 6 files changed, 724 insertions(+), 70 deletions(-)
> create mode 100644 tools/perf/util/bpf_ftrace.c
> create mode 100644 tools/perf/util/bpf_skel/func_latency.bpf.c
> create mode 100644 tools/perf/util/ftrace.h
>
>
> base-commit: 8ab774587903771821b59471cc723bba6d893942
> --
> 2.34.0.rc2.393.gf8c9666880-goog
--
- Arnaldo
Hi Arnaldo,
On Tue, Nov 30, 2021 at 6:37 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > Hello,
> >
> > I've implemented 'latency' subcommand in the perf ftrace command to
> > show a histogram of function latency.
> >
> > To handle new subcommands, the existing functionality is moved to
> > 'trace' subcommand while preserving backward compatibility of not
> > having a subcommand at all (defaults to 'trace').
> >
> > The latency subcommand accepts a target (kernel, for now) function
> > with -T option and shows a histogram like below:
>
> Humm, wouldn't be interesting to shorten this by having a new 'perf
> flat' (function latency) tool, on the same level as 'perf ftrace' and
> leave 'perf ftrace' to just being a convenient perf interface to what
> ftrace provides?
That would be fine. I also think 'perf ftrace latency' is
bit too long. But if we would add a new feature
like argdist (in BCC) later, I thought it'd be nice being
a subcommand in the perf ftrace together.
But it's up to you. I'll make a change if you prefer
'flat' (or how about 'fnlat' instead?).
>
> But all around, nice work, cool new toyz! :-)
Thanks!
Namhyung
On Tue, Nov 30, 2021 at 2:58 PM Namhyung Kim <[email protected]> wrote:
>
> Hi Arnaldo,
>
> On Tue, Nov 30, 2021 at 6:37 AM Arnaldo Carvalho de Melo
> <[email protected]> wrote:
> >
> > Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > > Hello,
> > >
> > > I've implemented 'latency' subcommand in the perf ftrace command to
> > > show a histogram of function latency.
> > >
> > > To handle new subcommands, the existing functionality is moved to
> > > 'trace' subcommand while preserving backward compatibility of not
> > > having a subcommand at all (defaults to 'trace').
> > >
> > > The latency subcommand accepts a target (kernel, for now) function
> > > with -T option and shows a histogram like below:
> >
> > Humm, wouldn't be interesting to shorten this by having a new 'perf
> > flat' (function latency) tool, on the same level as 'perf ftrace' and
> > leave 'perf ftrace' to just being a convenient perf interface to what
> > ftrace provides?
>
> That would be fine. I also think 'perf ftrace latency' is
> bit too long. But if we would add a new feature
> like argdist (in BCC) later, I thought it'd be nice being
> a subcommand in the perf ftrace together.
>
> But it's up to you. I'll make a change if you prefer
> 'flat' (or how about 'fnlat' instead?).
>
I am not too fond of the flat option because as we had more bpf tools
like function latency, then we keep extending the list of commands
each with a small span which is different
from what we have right now.
Em Tue, Nov 30, 2021 at 04:36:49PM -0800, Stephane Eranian escreveu:
> On Tue, Nov 30, 2021 at 2:58 PM Namhyung Kim <[email protected]> wrote:
> > On Tue, Nov 30, 2021 at 6:37 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > > > I've implemented 'latency' subcommand in the perf ftrace command to
> > > > show a histogram of function latency.
> > > > To handle new subcommands, the existing functionality is moved to
> > > > 'trace' subcommand while preserving backward compatibility of not
> > > > having a subcommand at all (defaults to 'trace').
> > > > The latency subcommand accepts a target (kernel, for now) function
> > > > with -T option and shows a histogram like below:
> > > Humm, wouldn't be interesting to shorten this by having a new 'perf
> > > flat' (function latency) tool, on the same level as 'perf ftrace' and
> > > leave 'perf ftrace' to just being a convenient perf interface to what
> > > ftrace provides?
> > That would be fine. I also think 'perf ftrace latency' is
> > bit too long. But if we would add a new feature
> > like argdist (in BCC) later, I thought it'd be nice being
> > a subcommand in the perf ftrace together.
> > But it's up to you. I'll make a change if you prefer
> > 'flat' (or how about 'fnlat' instead?).
fnlat would be ok, flat was just funny to avoid suggesting it :-)
> I am not too fond of the flat option because as we had more bpf tools
> like function latency, then we keep extending the list of commands
> each with a small span which is different
> from what we have right now.
I think we should focus on the tool end result, not on how it is
implemented, i.e. in this specific "function latency" tool ftrace is
used with BPF, but we could perhaps have used some other mechanism.
I think all these tools should have as much as possible a common set of
options, like the targets (cpu, cgroup, pid, tid, etc) so that one can
go from different views for those targets by just changing the name of
the tool.
We have things like:
$ perf sched
Usage: perf sched [<options>] {record|latency|map|replay|script|timehist}
-D, --dump-raw-trace dump raw trace in ASCII
-f, --force don't complain, do it
-i, --input <file> input file name
-v, --verbose be more verbose (show symbol address, etc)
With different 3rd level subcommads, but in the 'perf sched' case is the
component being observed, not the mechanism being used to obtain/present
the observation data.
- Arnaldo
On Wed, Dec 1, 2021 at 3:59 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
>
> Em Tue, Nov 30, 2021 at 04:36:49PM -0800, Stephane Eranian escreveu:
> > On Tue, Nov 30, 2021 at 2:58 PM Namhyung Kim <[email protected]> wrote:
> > > On Tue, Nov 30, 2021 at 6:37 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
>
> > > > Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > > > > I've implemented 'latency' subcommand in the perf ftrace command to
> > > > > show a histogram of function latency.
>
> > > > > To handle new subcommands, the existing functionality is moved to
> > > > > 'trace' subcommand while preserving backward compatibility of not
> > > > > having a subcommand at all (defaults to 'trace').
>
> > > > > The latency subcommand accepts a target (kernel, for now) function
> > > > > with -T option and shows a histogram like below:
>
> > > > Humm, wouldn't be interesting to shorten this by having a new 'perf
> > > > flat' (function latency) tool, on the same level as 'perf ftrace' and
> > > > leave 'perf ftrace' to just being a convenient perf interface to what
> > > > ftrace provides?
>
> > > That would be fine. I also think 'perf ftrace latency' is
> > > bit too long. But if we would add a new feature
> > > like argdist (in BCC) later, I thought it'd be nice being
> > > a subcommand in the perf ftrace together.
>
> > > But it's up to you. I'll make a change if you prefer
> > > 'flat' (or how about 'fnlat' instead?).
>
> fnlat would be ok, flat was just funny to avoid suggesting it :-)
:)
>
> > I am not too fond of the flat option because as we had more bpf tools
> > like function latency, then we keep extending the list of commands
> > each with a small span which is different
> > from what we have right now.
>
> I think we should focus on the tool end result, not on how it is
> implemented, i.e. in this specific "function latency" tool ftrace is
> used with BPF, but we could perhaps have used some other mechanism.
Agreed, but I think function latency belongs to function tracing
conceptually. So I added it as a subcommand in perf ftrace
not just because of the implementation.
>
> I think all these tools should have as much as possible a common set of
> options, like the targets (cpu, cgroup, pid, tid, etc) so that one can
> go from different views for those targets by just changing the name of
> the tool.
Currently, perf ftrace shares the target options with both subcommands.
Please see common_options in cmd_ftrace().
Thanks,
Namhyung
>
> We have things like:
>
> $ perf sched
>
> Usage: perf sched [<options>] {record|latency|map|replay|script|timehist}
>
> -D, --dump-raw-trace dump raw trace in ASCII
> -f, --force don't complain, do it
> -i, --input <file> input file name
> -v, --verbose be more verbose (show symbol address, etc)
>
> With different 3rd level subcommads, but in the 'perf sched' case is the
> component being observed, not the mechanism being used to obtain/present
> the observation data.
>
> - Arnaldo
Em Wed, Dec 01, 2021 at 09:21:52AM -0800, Namhyung Kim escreveu:
> On Wed, Dec 1, 2021 at 3:59 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > Em Tue, Nov 30, 2021 at 04:36:49PM -0800, Stephane Eranian escreveu:
> > > I am not too fond of the flat option because as we had more bpf tools
> > > like function latency, then we keep extending the list of commands
> > > each with a small span which is different
> > > from what we have right now.
> > I think we should focus on the tool end result, not on how it is
> > implemented, i.e. in this specific "function latency" tool ftrace is
> > used with BPF, but we could perhaps have used some other mechanism.
> Agreed, but I think function latency belongs to function tracing
> conceptually. So I added it as a subcommand in perf ftrace
> not just because of the implementation.
Fair enough, I think we can go as-is then, whoever finds this overly
long can do:
alias flat="perf ftrace latency"
And go:
flat -p `pidof firefox`
etc.
> > I think all these tools should have as much as possible a common set of
> > options, like the targets (cpu, cgroup, pid, tid, etc) so that one can
> > go from different views for those targets by just changing the name of
> > the tool.
> Currently, perf ftrace shares the target options with both subcommands.
> Please see common_options in cmd_ftrace().
Sure, but I was alluding to all perf tools, not just 'perf ftrace'
subcommands, i.e. one can go from:
perf ftrace trace --pid `pidof firefox`
to:
perf trace --pid `pidof firefox`
to:
perf stat --pid `pidof firefox`
to:
perf top --pid `pidof firefox`
and get different views of that workload.
Have you thought about userspace function latencies? What tech would you
use for that, since ftrace doesn't cover those?
Would be nice that a single tool could be used to obtain userspace and
kernel space function latencies, just like 'perf probe' can be used for
kernel and userspace, choosing, behind the scenes, kprobes or uprobes.
- Arnaldo
On Fri, Dec 3, 2021 at 5:43 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> Em Wed, Dec 01, 2021 at 09:21:52AM -0800, Namhyung Kim escreveu:
> > On Wed, Dec 1, 2021 at 3:59 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > Em Tue, Nov 30, 2021 at 04:36:49PM -0800, Stephane Eranian escreveu:
> > > > I am not too fond of the flat option because as we had more bpf tools
> > > > like function latency, then we keep extending the list of commands
> > > > each with a small span which is different
> > > > from what we have right now.
>
> > > I think we should focus on the tool end result, not on how it is
> > > implemented, i.e. in this specific "function latency" tool ftrace is
> > > used with BPF, but we could perhaps have used some other mechanism.
>
> > Agreed, but I think function latency belongs to function tracing
> > conceptually. So I added it as a subcommand in perf ftrace
> > not just because of the implementation.
>
> Fair enough, I think we can go as-is then, whoever finds this overly
> long can do:
>
> alias flat="perf ftrace latency"
>
> And go:
>
> flat -p `pidof firefox`
>
> etc.
Looks good! Thanks.
>
> > > I think all these tools should have as much as possible a common set of
> > > options, like the targets (cpu, cgroup, pid, tid, etc) so that one can
> > > go from different views for those targets by just changing the name of
> > > the tool.
>
> > Currently, perf ftrace shares the target options with both subcommands.
> > Please see common_options in cmd_ftrace().
>
> Sure, but I was alluding to all perf tools, not just 'perf ftrace'
> subcommands, i.e. one can go from:
>
> perf ftrace trace --pid `pidof firefox`
>
> to:
>
> perf trace --pid `pidof firefox`
>
> to:
>
> perf stat --pid `pidof firefox`
>
> to:
>
> perf top --pid `pidof firefox`
>
> and get different views of that workload.
Right, that's what I want too. Unfortunately, perf ftrace uses
the -t option for a different purpose. But it follows other
conventions like -a for system-wide, -p for pid already.
>
> Have you thought about userspace function latencies? What tech would you
> use for that, since ftrace doesn't cover those?
>
> Would be nice that a single tool could be used to obtain userspace and
> kernel space function latencies, just like 'perf probe' can be used for
> kernel and userspace, choosing, behind the scenes, kprobes or uprobes.
Yep, that's the next step. I think we can use uprobes
like 'perf probe' does.
Thanks,
Namhyung
> On 30-Nov-2021, at 4:48 AM, Namhyung Kim <[email protected]> wrote:
>
> The -b/--use-bpf option is to use BPF to get latency info of kernel
> functions. It'd have better performance impact and I observed that
> latency of same function is smaller than before when using BPF.
>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> tools/perf/Makefile.perf | 2 +-
> tools/perf/builtin-ftrace.c | 156 +++++++++++---------
> tools/perf/util/Build | 1 +
> tools/perf/util/bpf_ftrace.c | 113 ++++++++++++++
> tools/perf/util/bpf_skel/func_latency.bpf.c | 92 ++++++++++++
> tools/perf/util/ftrace.h | 81 ++++++++++
> 6 files changed, 378 insertions(+), 67 deletions(-)
> create mode 100644 tools/perf/util/bpf_ftrace.c
> create mode 100644 tools/perf/util/bpf_skel/func_latency.bpf.c
> create mode 100644 tools/perf/util/ftrace.h
>
> diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
> index 80522bcfafe0..a861b92ac6f9 100644
> --- a/tools/perf/Makefile.perf
> +++ b/tools/perf/Makefile.perf
> @@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
> SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
> SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
> SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
> -SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h
> +SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
>
> $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
> $(Q)$(MKDIR) -p $@
> diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
> index 8fd3c9c44c69..cde1e87032a1 100644
> --- a/tools/perf/builtin-ftrace.c
> +++ b/tools/perf/builtin-ftrace.c
> @@ -30,36 +30,12 @@
> #include "strfilter.h"
> #include "util/cap.h"
> #include "util/config.h"
> +#include "util/ftrace.h"
> #include "util/units.h"
> #include "util/parse-sublevel-options.h"
>
> #define DEFAULT_TRACER "function_graph"
>
> -struct perf_ftrace {
> - struct evlist *evlist;
> - struct target target;
> - const char *tracer;
> - struct list_head filters;
> - struct list_head notrace;
> - struct list_head graph_funcs;
> - struct list_head nograph_funcs;
> - int graph_depth;
> - unsigned long percpu_buffer_size;
> - bool inherit;
> - int func_stack_trace;
> - int func_irq_info;
> - int graph_nosleep_time;
> - int graph_noirqs;
> - int graph_verbose;
> - int graph_thresh;
> - unsigned int initial_delay;
> -};
> -
> -struct filter_entry {
> - struct list_head list;
> - char name[];
> -};
> -
> static volatile int workload_exec_errno;
> static bool done;
>
> @@ -704,8 +680,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
> return (done && !workload_exec_errno) ? 0 : -1;
> }
>
> -#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
> -
> static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
> {
> char *p, *q;
> @@ -816,69 +790,116 @@ static void display_histogram(int buckets[])
>
> }
>
> -static int __cmd_latency(struct perf_ftrace *ftrace)
> +static int prepare_func_latency(struct perf_ftrace *ftrace)
> {
> char *trace_file;
> - int trace_fd;
> - char buf[4096];
> - char line[256];
> - struct pollfd pollfd = {
> - .events = POLLIN,
> - };
> - int buckets[NUM_BUCKET] = { };
> + int fd;
>
> - if (!(perf_cap__capable(CAP_PERFMON) ||
> - perf_cap__capable(CAP_SYS_ADMIN))) {
> - pr_err("ftrace only works for %s!\n",
> -#ifdef HAVE_LIBCAP_SUPPORT
> - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> -#else
> - "root"
> -#endif
> - );
> - return -1;
> - }
> + if (ftrace->target.use_bpf)
> + return perf_ftrace__latency_prepare_bpf(ftrace);
>
> if (reset_tracing_files(ftrace) < 0) {
> pr_err("failed to reset ftrace\n");
> - goto out;
> + return -1;
> }
>
> /* reset ftrace buffer */
> if (write_tracing_file("trace", "0") < 0)
> - goto out;
> + return -1;
>
> if (set_tracing_options(ftrace) < 0)
> - goto out_reset;
> + return -1;
>
> /* force to use the function_graph tracer to track duration */
> if (write_tracing_file("current_tracer", "function_graph") < 0) {
> pr_err("failed to set current_tracer to function_graph\n");
> - goto out_reset;
> + return -1;
> }
>
> trace_file = get_tracing_file("trace_pipe");
> if (!trace_file) {
> pr_err("failed to open trace_pipe\n");
> - goto out_reset;
> + return -1;
> }
>
> - trace_fd = open(trace_file, O_RDONLY);
> + fd = open(trace_file, O_RDONLY);
> + if (fd < 0)
> + pr_err("failed to open trace_pipe\n");
>
> put_tracing_file(trace_file);
> + return fd;
> +}
>
> - if (trace_fd < 0) {
> - pr_err("failed to open trace_pipe\n");
> - goto out_reset;
> +static int start_func_latency(struct perf_ftrace *ftrace)
> +{
> + if (ftrace->target.use_bpf)
> + return perf_ftrace__latency_start_bpf(ftrace);
> +
> + if (write_tracing_file("tracing_on", "1") < 0) {
> + pr_err("can't enable tracing\n");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int stop_func_latency(struct perf_ftrace *ftrace)
> +{
> + if (ftrace->target.use_bpf)
> + return perf_ftrace__latency_stop_bpf(ftrace);
> +
> + write_tracing_file("tracing_on", "0");
> + return 0;
> +}
> +
> +static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
> +{
> + if (ftrace->target.use_bpf)
> + return perf_ftrace__latency_read_bpf(ftrace, buckets);
> +
> + return 0;
> +}
> +
> +static int cleanup_func_latency(struct perf_ftrace *ftrace)
> +{
> + if (ftrace->target.use_bpf)
> + return perf_ftrace__latency_cleanup_bpf(ftrace);
> +
> + reset_tracing_files(ftrace);
> + return 0;
> +}
> +
> +static int __cmd_latency(struct perf_ftrace *ftrace)
> +{
> + int trace_fd;
> + char buf[4096];
> + char line[256];
> + struct pollfd pollfd = {
> + .events = POLLIN,
> + };
> + int buckets[NUM_BUCKET] = { };
> +
> + if (!(perf_cap__capable(CAP_PERFMON) ||
> + perf_cap__capable(CAP_SYS_ADMIN))) {
> + pr_err("ftrace only works for %s!\n",
> +#ifdef HAVE_LIBCAP_SUPPORT
> + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> +#else
> + "root"
> +#endif
> + );
> + return -1;
> }
>
> + trace_fd = prepare_func_latency(ftrace);
> + if (trace_fd < 0)
> + goto out;
> +
> fcntl(trace_fd, F_SETFL, O_NONBLOCK);
> pollfd.fd = trace_fd;
>
> - if (write_tracing_file("tracing_on", "1") < 0) {
> - pr_err("can't enable tracing\n");
> - goto out_close_fd;
> - }
> + if (start_func_latency(ftrace) < 0)
> + goto out;
>
> evlist__start_workload(ftrace->evlist);
>
> @@ -896,29 +917,30 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
> }
> }
>
> - write_tracing_file("tracing_on", "0");
> + stop_func_latency(ftrace);
>
> if (workload_exec_errno) {
> const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
> pr_err("workload failed: %s\n", emsg);
> - goto out_close_fd;
> + goto out;
> }
>
> /* read remaining buffer contents */
> - while (true) {
> + while (!ftrace->target.use_bpf) {
> int n = read(trace_fd, buf, sizeof(buf) - 1);
> if (n <= 0)
> break;
> make_histogram(buckets, buf, n, line);
> }
>
> + read_func_latency(ftrace, buckets);
> +
> display_histogram(buckets);
>
> -out_close_fd:
> - close(trace_fd);
> -out_reset:
> - reset_tracing_files(ftrace);
> out:
> + close(trace_fd);
> + cleanup_func_latency(ftrace);
> +
> return (done && !workload_exec_errno) ? 0 : -1;
> }
>
> @@ -1144,6 +1166,8 @@ int cmd_ftrace(int argc, const char **argv)
> const struct option latency_options[] = {
> OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
> "Show latency of given function", parse_filter_func),
> + OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
> + "Use BPF to measure function latency"),
Hi Namhyung,
Can this be inside BPF_SKEL check, similar to how we have “bpf-prog” and other options in builtin-stat.c ?
#ifdef HAVE_BPF_SKEL
<<OPT_BOOLEAN for use-bpf>>
#endif
Otherwise when using “-b” and if perf is not built with BPF_SKEL, we will just return in perf_ftrace__latency_prepare_bpf without any error messages.
Thanks
Athira
> OPT_PARENT(common_options),
> };
> const struct option *options = ftrace_options;
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 2e5bfbb69960..294b12430d73 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -144,6 +144,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
> perf-$(CONFIG_LIBBPF) += bpf_map.o
> perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
> perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
> +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
> perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
> perf-$(CONFIG_LIBELF) += symbol-elf.o
> perf-$(CONFIG_LIBELF) += probe-file.o
> diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> new file mode 100644
> index 000000000000..1975a6fe73c9
> --- /dev/null
> +++ b/tools/perf/util/bpf_ftrace.c
> @@ -0,0 +1,113 @@
> +#include <stdio.h>
> +#include <fcntl.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +#include <linux/err.h>
> +
> +#include "util/ftrace.h"
> +#include "util/debug.h"
> +#include "util/bpf_counter.h"
> +
> +#include "util/bpf_skel/func_latency.skel.h"
> +
> +static struct func_latency_bpf *skel;
> +
> +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
> +{
> + int fd, err;
> + struct filter_entry *func;
> + struct bpf_link *begin_link, *end_link;
> +
> + if (!list_is_singular(&ftrace->filters)) {
> + pr_err("ERROR: %s target function(s).\n",
> + list_empty(&ftrace->filters) ? "No" : "Too many");
> + return -1;
> + }
> +
> + func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> +
> + skel = func_latency_bpf__open();
> + if (!skel) {
> + pr_err("Failed to open func latency skeleton\n");
> + return -1;
> + }
> +
> + set_max_rlimit();
> +
> + err = func_latency_bpf__load(skel);
> + if (err) {
> + pr_err("Failed to load func latency skeleton\n");
> + goto out;
> + }
> +
> + begin_link = bpf_program__attach_kprobe(skel->progs.func_begin,
> + false, func->name);
> + if (IS_ERR(begin_link)) {
> + pr_err("Failed to attach fentry program\n");
> + err = PTR_ERR(begin_link);
> + goto out;
> + }
> +
> + end_link = bpf_program__attach_kprobe(skel->progs.func_end,
> + true, func->name);
> + if (IS_ERR(end_link)) {
> + pr_err("Failed to attach fexit program\n");
> + err = PTR_ERR(end_link);
> + bpf_link__destroy(begin_link);
> + goto out;
> + }
> +
> + /* XXX: we don't actually use this fd - just for poll() */
> + return open("/dev/null", O_RDONLY);
> +
> +out:
> + return err;
> +}
> +
> +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
> +{
> + skel->bss->enabled = 1;
> + return 0;
> +}
> +
> +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
> +{
> + skel->bss->enabled = 0;
> + return 0;
> +}
> +
> +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
> + int buckets[])
> +{
> + int i, fd, err;
> + u32 idx;
> + u64 *hist;
> + int ncpus = cpu__max_cpu();
> +
> + fd = bpf_map__fd(skel->maps.latency);
> +
> + hist = calloc(ncpus, sizeof(*hist));
> + if (hist == NULL)
> + return -ENOMEM;
> +
> + for (idx = 0; idx < NUM_BUCKET; idx++) {
> + err = bpf_map_lookup_elem(fd, &idx, hist);
> + if (err) {
> + buckets[idx] = 0;
> + continue;
> + }
> +
> + for (i = 0; i < ncpus; i++)
> + buckets[idx] += hist[i];
> + }
> +
> + free(hist);
> + return 0;
> +}
> +
> +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
> +{
> + func_latency_bpf__destroy(skel);
> + return 0;
> +}
> diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
> new file mode 100644
> index 000000000000..d7d31cfeabf8
> --- /dev/null
> +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
> @@ -0,0 +1,92 @@
> +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +// Copyright (c) 2021 Google
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +#define NUM_BUCKET 22
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_HASH);
> + __uint(key_size, sizeof(__u64));
> + __uint(value_size, sizeof(__u64));
> + __uint(max_entries, 10000);
> +} functime SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_HASH);
> + __uint(key_size, sizeof(__u32));
> + __uint(value_size, sizeof(__u8));
> + __uint(max_entries, 1);
> +} cpu_filter SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_HASH);
> + __uint(key_size, sizeof(__u32));
> + __uint(value_size, sizeof(__u8));
> + __uint(max_entries, 1);
> +} task_filter SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
> + __uint(key_size, sizeof(__u32));
> + __uint(value_size, sizeof(__u64));
> + __uint(max_entries, NUM_BUCKET);
> +} latency SEC(".maps");
> +
> +
> +int enabled = 0;
> +
> +SEC("kprobe/func")
> +int BPF_PROG(func_begin)
> +{
> + __u64 key, now;
> +
> + if (!enabled)
> + return 0;
> +
> + key = bpf_get_current_pid_tgid();
> + now = bpf_ktime_get_ns();
> +
> + // overwrite timestamp for nested functions
> + bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
> + return 0;
> +}
> +
> +SEC("kretprobe/func")
> +int BPF_PROG(func_end)
> +{
> + __u64 tid;
> + __u64 *start;
> +
> + if (!enabled)
> + return 0;
> +
> + tid = bpf_get_current_pid_tgid();
> +
> + start = bpf_map_lookup_elem(&functime, &tid);
> + if (start) {
> + __s64 delta = bpf_ktime_get_ns() - *start;
> + __u32 key;
> + __u64 *hist;
> +
> + bpf_map_delete_elem(&functime, &tid);
> +
> + if (delta < 0)
> + return 0;
> +
> + // calculate index using delta in usec
> + for (key = 0; key < (NUM_BUCKET - 1); key++) {
> + if (delta < ((1000UL) << key))
> + break;
> + }
> +
> + hist = bpf_map_lookup_elem(&latency, &key);
> + if (!hist)
> + return 0;
> +
> + *hist += 1;
> + }
> +
> + return 0;
> +}
> diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
> new file mode 100644
> index 000000000000..887f68a185f7
> --- /dev/null
> +++ b/tools/perf/util/ftrace.h
> @@ -0,0 +1,81 @@
> +#ifndef __PERF_FTRACE_H__
> +#define __PERF_FTRACE_H__
> +
> +#include <linux/list.h>
> +
> +#include "target.h"
> +
> +struct evlist;
> +
> +struct perf_ftrace {
> + struct evlist *evlist;
> + struct target target;
> + const char *tracer;
> + struct list_head filters;
> + struct list_head notrace;
> + struct list_head graph_funcs;
> + struct list_head nograph_funcs;
> + unsigned long percpu_buffer_size;
> + bool inherit;
> + int graph_depth;
> + int func_stack_trace;
> + int func_irq_info;
> + int graph_nosleep_time;
> + int graph_noirqs;
> + int graph_verbose;
> + int graph_thresh;
> + unsigned int initial_delay;
> +};
> +
> +struct filter_entry {
> + struct list_head list;
> + char name[];
> +};
> +
> +#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
> +
> +#ifdef HAVE_BPF_SKEL
> +
> +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
> +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
> +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
> +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
> + int buckets[]);
> +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
> +
> +#else /* !HAVE_BPF_SKEL */
> +
> +static inline int
> +perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
> +{
> + return -1;
> +}
> +
> +static inline int
> +perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
> +{
> + return -1;
> +}
> +
> +static inline int
> +perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
> +{
> + return -1;
> +}
> +
> +static inline int
> +perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
> + int buckets[] __maybe_unused)
> +{
> + return -1;
> +}
> +
> +static inline int
> +perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
> +{
> + return -1;
> +}
> +
> +#endif /* HAVE_BPF_SKEL */
> +
> +#endif /* __PERF_FTRACE_H__ */
> --
> 2.34.0.rc2.393.gf8c9666880-goog
>
>
Hello Athira,
On Sun, Dec 5, 2021 at 6:24 PM Athira Rajeev
<[email protected]> wrote:
>
>
>
> > On 30-Nov-2021, at 4:48 AM, Namhyung Kim <[email protected]> wrote:
> >
> > The -b/--use-bpf option is to use BPF to get latency info of kernel
> > functions. It'd have better performance impact and I observed that
> > latency of same function is smaller than before when using BPF.
> >
> > Signed-off-by: Namhyung Kim <[email protected]>
> > ---
[SNIP]
> > @@ -1144,6 +1166,8 @@ int cmd_ftrace(int argc, const char **argv)
> > const struct option latency_options[] = {
> > OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
> > "Show latency of given function", parse_filter_func),
> > + OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
> > + "Use BPF to measure function latency"),
>
>
> Hi Namhyung,
>
> Can this be inside BPF_SKEL check, similar to how we have “bpf-prog” and other options in builtin-stat.c ?
>
> #ifdef HAVE_BPF_SKEL
> <<OPT_BOOLEAN for use-bpf>>
> #endif
>
> Otherwise when using “-b” and if perf is not built with BPF_SKEL, we will just return in perf_ftrace__latency_prepare_bpf without any error messages.
Thanks for reporting this. Yeah, it should report
error messages in such conditions.
I think it'd be better to add an error message
rather than hiding the option.
Thanks,
Namhyung
> On Nov 29, 2021, at 3:18 PM, Namhyung Kim <[email protected]> wrote:
>
> The -b/--use-bpf option is to use BPF to get latency info of kernel
> functions. It'd have better performance impact and I observed that
> latency of same function is smaller than before when using BPF.
>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
We can actually get something similar with a bpftrace one-liner, like:
bpftrace -e 'kprobe:mutex_lock { @start[tid] = nsecs; } kretprobe:mutex_lock /@start[tid] != 0/ { @delay = hist(nsecs - @start[tid]); delete(@start[tid]); } END {clear(@start); }'
Attaching 3 probes...
^C
@delay:
[256, 512) 1553006 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[512, 1K) 89171 |@@ |
[1K, 2K) 37522 |@ |
[2K, 4K) 3308 | |
[4K, 8K) 415 | |
[8K, 16K) 38 | |
[16K, 32K) 47 | |
[32K, 64K) 2 | |
[64K, 128K) 0 | |
[128K, 256K) 0 | |
[256K, 512K) 0 | |
[512K, 1M) 0 | |
[1M, 2M) 0 | |
[2M, 4M) 0 | |
[4M, 8M) 1 | |
So I am not quite sure whether we need this for systems with BPF features.
Other than this, a few comments and nitpicks below.
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 2e5bfbb69960..294b12430d73 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -144,6 +144,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
> perf-$(CONFIG_LIBBPF) += bpf_map.o
> perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
> perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
> +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
> perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
> perf-$(CONFIG_LIBELF) += symbol-elf.o
> perf-$(CONFIG_LIBELF) += probe-file.o
> diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> new file mode 100644
> index 000000000000..1975a6fe73c9
> --- /dev/null
> +++ b/tools/perf/util/bpf_ftrace.c
> @@ -0,0 +1,113 @@
> +#include <stdio.h>
> +#include <fcntl.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +#include <linux/err.h>
> +
> +#include "util/ftrace.h"
> +#include "util/debug.h"
> +#include "util/bpf_counter.h"
> +
> +#include "util/bpf_skel/func_latency.skel.h"
> +
> +static struct func_latency_bpf *skel;
> +
> +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
> +{
> + int fd, err;
> + struct filter_entry *func;
> + struct bpf_link *begin_link, *end_link;
> +
> + if (!list_is_singular(&ftrace->filters)) {
> + pr_err("ERROR: %s target function(s).\n",
> + list_empty(&ftrace->filters) ? "No" : "Too many");
> + return -1;
> + }
> +
> + func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> +
> + skel = func_latency_bpf__open();
> + if (!skel) {
> + pr_err("Failed to open func latency skeleton\n");
> + return -1;
> + }
> +
> + set_max_rlimit();
> +
> + err = func_latency_bpf__load(skel);
We can do func_latency_bpf__open_and_load() to save a few lines.
> + if (err) {
> + pr_err("Failed to load func latency skeleton\n");
> + goto out;
> + }
> +
> + begin_link = bpf_program__attach_kprobe(skel->progs.func_begin,
> + false, func->name);
> + if (IS_ERR(begin_link)) {
> + pr_err("Failed to attach fentry program\n");
> + err = PTR_ERR(begin_link);
> + goto out;
> + }
> +
> + end_link = bpf_program__attach_kprobe(skel->progs.func_end,
> + true, func->name);
> + if (IS_ERR(end_link)) {
> + pr_err("Failed to attach fexit program\n");
> + err = PTR_ERR(end_link);
> + bpf_link__destroy(begin_link);
> + goto out;
> + }
I think we are leaking begin_link and end_link here? (They will be released
on perf termination, but we are not freeing them in the code).
[...]
> diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
> new file mode 100644
> index 000000000000..d7d31cfeabf8
> --- /dev/null
> +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
> @@ -0,0 +1,92 @@
> +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +// Copyright (c) 2021 Google
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +#define NUM_BUCKET 22
We define NUM_BUCKET twice, which might cause issue when we change it.
Maybe just use bpf_map__set_max_entries() in user space?
[...]
Hi Song,
On Mon, Dec 6, 2021 at 5:06 PM Song Liu <[email protected]> wrote:
>
>
>
> > On Nov 29, 2021, at 3:18 PM, Namhyung Kim <[email protected]> wrote:
> >
> > The -b/--use-bpf option is to use BPF to get latency info of kernel
> > functions. It'd have better performance impact and I observed that
> > latency of same function is smaller than before when using BPF.
> >
> > Signed-off-by: Namhyung Kim <[email protected]>
> > ---
>
> We can actually get something similar with a bpftrace one-liner, like:
>
> bpftrace -e 'kprobe:mutex_lock { @start[tid] = nsecs; } kretprobe:mutex_lock /@start[tid] != 0/ { @delay = hist(nsecs - @start[tid]); delete(@start[tid]); } END {clear(@start); }'
> Attaching 3 probes...
> ^C
>
> @delay:
> [256, 512) 1553006 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
> [512, 1K) 89171 |@@ |
> [1K, 2K) 37522 |@ |
> [2K, 4K) 3308 | |
> [4K, 8K) 415 | |
> [8K, 16K) 38 | |
> [16K, 32K) 47 | |
> [32K, 64K) 2 | |
> [64K, 128K) 0 | |
> [128K, 256K) 0 | |
> [256K, 512K) 0 | |
> [512K, 1M) 0 | |
> [1M, 2M) 0 | |
> [2M, 4M) 0 | |
> [4M, 8M) 1 | |
>
>
> So I am not quite sure whether we need this for systems with BPF features.
Yeah, bpftrace can do this too but there are situations one cannot
use the tool for some reason. On the other hand, we have been
using perf tools widely to collect performance profiles on the fleet.
So it'd be really nice if we can use it as a vehicle to carry various
innovative features using BPF. I plan to add more functionalities
in BCC/bpftrace to the perf tools in this regard.
>
> Other than this, a few comments and nitpicks below.
>
> > diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> > index 2e5bfbb69960..294b12430d73 100644
> > --- a/tools/perf/util/Build
> > +++ b/tools/perf/util/Build
> > @@ -144,6 +144,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
> > perf-$(CONFIG_LIBBPF) += bpf_map.o
> > perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
> > perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
> > +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
> > perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
> > perf-$(CONFIG_LIBELF) += symbol-elf.o
> > perf-$(CONFIG_LIBELF) += probe-file.o
> > diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> > new file mode 100644
> > index 000000000000..1975a6fe73c9
> > --- /dev/null
> > +++ b/tools/perf/util/bpf_ftrace.c
> > @@ -0,0 +1,113 @@
> > +#include <stdio.h>
> > +#include <fcntl.h>
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +
> > +#include <linux/err.h>
> > +
> > +#include "util/ftrace.h"
> > +#include "util/debug.h"
> > +#include "util/bpf_counter.h"
> > +
> > +#include "util/bpf_skel/func_latency.skel.h"
> > +
> > +static struct func_latency_bpf *skel;
> > +
> > +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
> > +{
> > + int fd, err;
> > + struct filter_entry *func;
> > + struct bpf_link *begin_link, *end_link;
> > +
> > + if (!list_is_singular(&ftrace->filters)) {
> > + pr_err("ERROR: %s target function(s).\n",
> > + list_empty(&ftrace->filters) ? "No" : "Too many");
> > + return -1;
> > + }
> > +
> > + func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> > +
> > + skel = func_latency_bpf__open();
> > + if (!skel) {
> > + pr_err("Failed to open func latency skeleton\n");
> > + return -1;
> > + }
> > +
> > + set_max_rlimit();
> > +
> > + err = func_latency_bpf__load(skel);
>
> We can do func_latency_bpf__open_and_load() to save a few lines.
Sure, but I was thinking to add some modifications between
them like in patch 5/5.
>
> > + if (err) {
> > + pr_err("Failed to load func latency skeleton\n");
> > + goto out;
> > + }
> > +
> > + begin_link = bpf_program__attach_kprobe(skel->progs.func_begin,
> > + false, func->name);
> > + if (IS_ERR(begin_link)) {
> > + pr_err("Failed to attach fentry program\n");
> > + err = PTR_ERR(begin_link);
> > + goto out;
> > + }
> > +
> > + end_link = bpf_program__attach_kprobe(skel->progs.func_end,
> > + true, func->name);
> > + if (IS_ERR(end_link)) {
> > + pr_err("Failed to attach fexit program\n");
> > + err = PTR_ERR(end_link);
> > + bpf_link__destroy(begin_link);
> > + goto out;
> > + }
>
> I think we are leaking begin_link and end_link here? (They will be released
> on perf termination, but we are not freeing them in the code).
Right, I'll keep them and destroy at the end.
>
> [...]
>
> > diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
> > new file mode 100644
> > index 000000000000..d7d31cfeabf8
> > --- /dev/null
> > +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
> > @@ -0,0 +1,92 @@
> > +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> > +// Copyright (c) 2021 Google
> > +#include "vmlinux.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_tracing.h>
> > +
> > +#define NUM_BUCKET 22
>
> We define NUM_BUCKET twice, which might cause issue when we change it.
> Maybe just use bpf_map__set_max_entries() in user space?
Sure, will do.
Thanks,
Namhyung
On Tue, Dec 7, 2021 at 10:00 AM Namhyung Kim <[email protected]> wrote:
>
> Hi Song,
>
> On Mon, Dec 6, 2021 at 5:06 PM Song Liu <[email protected]> wrote:
> >
> >
> >
> > > On Nov 29, 2021, at 3:18 PM, Namhyung Kim <[email protected]> wrote:
> > >
> > > The -b/--use-bpf option is to use BPF to get latency info of kernel
> > > functions. It'd have better performance impact and I observed that
> > > latency of same function is smaller than before when using BPF.
> > >
> > > Signed-off-by: Namhyung Kim <[email protected]>
> > > ---
> >
> > We can actually get something similar with a bpftrace one-liner, like:
> >
> > bpftrace -e 'kprobe:mutex_lock { @start[tid] = nsecs; } kretprobe:mutex_lock /@start[tid] != 0/ { @delay = hist(nsecs - @start[tid]); delete(@start[tid]); } END {clear(@start); }'
> > Attaching 3 probes...
> > ^C
> >
> > @delay:
> > [256, 512) 1553006 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
> > [512, 1K) 89171 |@@ |
> > [1K, 2K) 37522 |@ |
> > [2K, 4K) 3308 | |
> > [4K, 8K) 415 | |
> > [8K, 16K) 38 | |
> > [16K, 32K) 47 | |
> > [32K, 64K) 2 | |
> > [64K, 128K) 0 | |
> > [128K, 256K) 0 | |
> > [256K, 512K) 0 | |
> > [512K, 1M) 0 | |
> > [1M, 2M) 0 | |
> > [2M, 4M) 0 | |
> > [4M, 8M) 1 | |
> >
> >
> > So I am not quite sure whether we need this for systems with BPF features.
>
> Yeah, bpftrace can do this too but there are situations one cannot
> use the tool for some reason. On the other hand, we have been
> using perf tools widely to collect performance profiles on the fleet.
>
> So it'd be really nice if we can use it as a vehicle to carry various
> innovative features using BPF. I plan to add more functionalities
> in BCC/bpftrace to the perf tools in this regard.
>
>
> >
> > Other than this, a few comments and nitpicks below.
> >
> > > diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> > > index 2e5bfbb69960..294b12430d73 100644
> > > --- a/tools/perf/util/Build
> > > +++ b/tools/perf/util/Build
> > > @@ -144,6 +144,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
> > > perf-$(CONFIG_LIBBPF) += bpf_map.o
> > > perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
> > > perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
> > > +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
> > > perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
> > > perf-$(CONFIG_LIBELF) += symbol-elf.o
> > > perf-$(CONFIG_LIBELF) += probe-file.o
> > > diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> > > new file mode 100644
> > > index 000000000000..1975a6fe73c9
> > > --- /dev/null
> > > +++ b/tools/perf/util/bpf_ftrace.c
> > > @@ -0,0 +1,113 @@
> > > +#include <stdio.h>
> > > +#include <fcntl.h>
> > > +#include <stdint.h>
> > > +#include <stdlib.h>
> > > +
> > > +#include <linux/err.h>
> > > +
> > > +#include "util/ftrace.h"
> > > +#include "util/debug.h"
> > > +#include "util/bpf_counter.h"
> > > +
> > > +#include "util/bpf_skel/func_latency.skel.h"
> > > +
> > > +static struct func_latency_bpf *skel;
> > > +
> > > +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
> > > +{
> > > + int fd, err;
> > > + struct filter_entry *func;
> > > + struct bpf_link *begin_link, *end_link;
> > > +
> > > + if (!list_is_singular(&ftrace->filters)) {
> > > + pr_err("ERROR: %s target function(s).\n",
> > > + list_empty(&ftrace->filters) ? "No" : "Too many");
> > > + return -1;
> > > + }
> > > +
> > > + func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> > > +
> > > + skel = func_latency_bpf__open();
> > > + if (!skel) {
> > > + pr_err("Failed to open func latency skeleton\n");
> > > + return -1;
> > > + }
> > > +
> > > + set_max_rlimit();
> > > +
> > > + err = func_latency_bpf__load(skel);
> >
> > We can do func_latency_bpf__open_and_load() to save a few lines.
>
> Sure, but I was thinking to add some modifications between
> them like in patch 5/5.
>
> >
> > > + if (err) {
> > > + pr_err("Failed to load func latency skeleton\n");
> > > + goto out;
> > > + }
> > > +
> > > + begin_link = bpf_program__attach_kprobe(skel->progs.func_begin,
> > > + false, func->name);
> > > + if (IS_ERR(begin_link)) {
> > > + pr_err("Failed to attach fentry program\n");
> > > + err = PTR_ERR(begin_link);
> > > + goto out;
> > > + }
> > > +
> > > + end_link = bpf_program__attach_kprobe(skel->progs.func_end,
> > > + true, func->name);
> > > + if (IS_ERR(end_link)) {
> > > + pr_err("Failed to attach fexit program\n");
> > > + err = PTR_ERR(end_link);
> > > + bpf_link__destroy(begin_link);
> > > + goto out;
> > > + }
> >
> > I think we are leaking begin_link and end_link here? (They will be released
> > on perf termination, but we are not freeing them in the code).
>
> Right, I'll keep them and destroy at the end.
Oh, it seems I can just use skel->links.func_begin (and _end).
>
> >
> > [...]
> >
> > > diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
> > > new file mode 100644
> > > index 000000000000..d7d31cfeabf8
> > > --- /dev/null
> > > +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
> > > @@ -0,0 +1,92 @@
> > > +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> > > +// Copyright (c) 2021 Google
> > > +#include "vmlinux.h"
> > > +#include <bpf/bpf_helpers.h>
> > > +#include <bpf/bpf_tracing.h>
> > > +
> > > +#define NUM_BUCKET 22
> >
> > We define NUM_BUCKET twice, which might cause issue when we change it.
> > Maybe just use bpf_map__set_max_entries() in user space?
It's also used for the loop count so I need to keep it..
Maybe I can add a comment to inform that it should be in sync
with the userspace.
Thanks,
Namhyung
Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> Hello,
>
> I've implemented 'latency' subcommand in the perf ftrace command to
> show a histogram of function latency.
This still applies cleanly, I'll test it later.
- Arnaldo
> To handle new subcommands, the existing functionality is moved to
> 'trace' subcommand while preserving backward compatibility of not
> having a subcommand at all (defaults to 'trace').
>
> The latency subcommand accepts a target (kernel, for now) function
> with -T option and shows a histogram like below:
>
> $ sudo ./perf ftrace latency -a -T mutex_lock sleep 1
> # DURATION | COUNT | GRAPH |
> 0 - 1 us | 2686 | ###################### |
> 1 - 2 us | 976 | ######## |
> 2 - 4 us | 879 | ####### |
> 4 - 8 us | 481 | #### |
> 8 - 16 us | 445 | ### |
> 16 - 32 us | 1 | |
> 32 - 64 us | 0 | |
> 64 - 128 us | 0 | |
> 128 - 256 us | 0 | |
> 256 - 512 us | 0 | |
> 512 - 1024 us | 0 | |
> 1 - 2 ms | 0 | |
> 2 - 4 ms | 0 | |
> 4 - 8 ms | 0 | |
> 8 - 16 ms | 0 | |
> 16 - 32 ms | 0 | |
> 32 - 64 ms | 0 | |
> 64 - 128 ms | 0 | |
> 128 - 256 ms | 0 | |
> 256 - 512 ms | 0 | |
> 512 - 1024 ms | 0 | |
> 1 - ... s | 0 | |
>
> It basically use the function graph tracer to extract the duration of
> the function. But with -b/--use-bpf option, it can use BPF to save
> the histogram in the kernel. For the same function, it gets:
>
> $ sudo ./perf ftrace latency -a -b -T mutex_lock sleep 1
> # DURATION | COUNT | GRAPH |
> 0 - 1 us | 4682 | ############################################# |
> 1 - 2 us | 11 | |
> 2 - 4 us | 0 | |
> 4 - 8 us | 0 | |
> 8 - 16 us | 7 | |
> 16 - 32 us | 6 | |
> 32 - 64 us | 0 | |
> 64 - 128 us | 0 | |
> 128 - 256 us | 0 | |
> 256 - 512 us | 0 | |
> 512 - 1024 us | 0 | |
> 1 - 2 ms | 0 | |
> 2 - 4 ms | 0 | |
> 4 - 8 ms | 0 | |
> 8 - 16 ms | 0 | |
> 16 - 32 ms | 0 | |
> 32 - 64 ms | 0 | |
> 64 - 128 ms | 0 | |
> 128 - 256 ms | 0 | |
> 256 - 512 ms | 0 | |
> 512 - 1024 ms | 0 | |
> 1 - ... s | 0 | |
>
>
> You can get the patches at 'perf/ftrace-latency-v1' branch on
>
> https://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>
>
> Thanks,
> Namhyung
>
>
> Namhyung Kim (5):
> perf ftrace: Add 'trace' subcommand
> perf ftrace: Move out common code from __cmd_ftrace
> perf ftrace: Add 'latency' subcommand
> perf ftrace: Add -b/--use-bpf option for latency subcommand
> perf ftrace: Implement cpu and task filters in BPF
>
> tools/perf/Makefile.perf | 2 +-
> tools/perf/builtin-ftrace.c | 443 +++++++++++++++++---
> tools/perf/util/Build | 1 +
> tools/perf/util/bpf_ftrace.c | 154 +++++++
> tools/perf/util/bpf_skel/func_latency.bpf.c | 113 +++++
> tools/perf/util/ftrace.h | 81 ++++
> 6 files changed, 724 insertions(+), 70 deletions(-)
> create mode 100644 tools/perf/util/bpf_ftrace.c
> create mode 100644 tools/perf/util/bpf_skel/func_latency.bpf.c
> create mode 100644 tools/perf/util/ftrace.h
>
>
> base-commit: 8ab774587903771821b59471cc723bba6d893942
> --
> 2.34.0.rc2.393.gf8c9666880-goog
--
- Arnaldo
On Mon, Dec 13, 2021 at 10:24 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > Hello,
> >
> > I've implemented 'latency' subcommand in the perf ftrace command to
> > show a histogram of function latency.
>
> This still applies cleanly, I'll test it later.
Thank you Arnaldo! While I have some small modifications
but the functionality should be the same. Please let me know
if you have any suggestions.
Thanks,
Namhyung
Em Mon, Dec 13, 2021 at 11:40:16AM -0800, Namhyung Kim escreveu:
> On Mon, Dec 13, 2021 at 10:24 AM Arnaldo Carvalho de Melo
> <[email protected]> wrote:
> >
> > Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > > Hello,
> > >
> > > I've implemented 'latency' subcommand in the perf ftrace command to
> > > show a histogram of function latency.
> >
> > This still applies cleanly, I'll test it later.
>
> Thank you Arnaldo! While I have some small modifications
> but the functionality should be the same. Please let me know
> if you have any suggestions.
So, it is failing here with:
⬢[acme@toolbox perf]$ m
make: Entering directory '/var/home/acme/git/perf/tools/perf'
BUILD: Doing 'make -j32' parallel build
CC /tmp/build/perf/perf-read-vdso32
LINK /tmp/build/perf/libperf-jvmti.so
make[3]: Nothing to be done for 'install_headers'.
CLANG /tmp/build/perf/util/bpf_skel/.tmp/func_latency.bpf.o
DESCEND plugins
PERF_VERSION = 5.16.rc5.g6924c0713d69
GEN perf-archive
GEN perf-with-kcore
GEN perf-iostat
GEN /tmp/build/perf/python/perf.so
INSTALL trace_plugins
GENSKEL /tmp/build/perf/util/bpf_skel/func_latency.skel.h
CC /tmp/build/perf/builtin-ftrace.o
CC /tmp/build/perf/util/header.o
CC /tmp/build/perf/util/bpf_ftrace.o
util/bpf_ftrace.c: In function ‘perf_ftrace__latency_prepare_bpf’:
util/bpf_ftrace.c:18:13: error: unused variable ‘fd’ [-Werror=unused-variable]
18 | int fd, err;
| ^~
util/bpf_ftrace.c: In function ‘perf_ftrace__latency_read_bpf’:
util/bpf_ftrace.c:86:21: error: implicit declaration of function ‘cpu__max_cpu’ [-Werror=implicit-function-declaration]
86 | int ncpus = cpu__max_cpu();
| ^~~~~~~~~~~~
cc1: all warnings being treated as errors
make[4]: *** [/var/home/acme/git/perf/tools/build/Makefile.build:96: /tmp/build/perf/util/bpf_ftrace.o] Error 1
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [/var/home/acme/git/perf/tools/build/Makefile.build:139: util] Error 2
make[2]: *** [Makefile.perf:665: /tmp/build/perf/perf-in.o] Error 2
make[1]: *** [Makefile.perf:240: sub-make] Error 2
make: *** [Makefile:113: install-bin] Error 2
make: Leaving directory '/var/home/acme/git/perf/tools/perf'
Performance counter stats for 'make -k BUILD_BPF_SKEL=1 CORESIGHT=1 PYTHON=python3 O=/tmp/build/perf -C tools/perf install-bin':
8,018,756,380 cycles:u
14,452,278,281 instructions:u # 1.80 insn per cycle
2.250520538 seconds time elapsed
1.834706000 seconds user
0.876410000 seconds sys
⬢[acme@toolbox perf]$
⬢[acme@toolbox perf]$ git log --oneline -5
6924c0713d691a6f (HEAD) perf ftrace: Add -b/--use-bpf option for latency subcommand
c96f789bf6313259 perf ftrace: Add 'latency' subcommand
0c5041e1141b53d3 perf ftrace: Move out common code from __cmd_ftrace
8b4a91a9a0b4fbf6 perf ftrace: Add 'trace' subcommand
6bf2efd9d99f3a14 perf arch: Support register names from all archs
⬢[acme@toolbox perf]$
Fixed with:
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 1975a6fe73c9fa8b..f5b49fc056ab8b95 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -5,6 +5,7 @@
#include <linux/err.h>
+#include "util/cpumap.h"
#include "util/ftrace.h"
#include "util/debug.h"
#include "util/bpf_counter.h"
@@ -15,7 +16,7 @@ static struct func_latency_bpf *skel;
int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
{
- int fd, err;
+ int err;
struct filter_entry *func;
struct bpf_link *begin_link, *end_link;
Em Wed, Dec 15, 2021 at 12:30:10PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Dec 13, 2021 at 11:40:16AM -0800, Namhyung Kim escreveu:
> > On Mon, Dec 13, 2021 at 10:24 AM Arnaldo Carvalho de Melo
> > <[email protected]> wrote:
> > >
> > > Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > > > Hello,
> > > >
> > > > I've implemented 'latency' subcommand in the perf ftrace command to
> > > > show a histogram of function latency.
> > >
> > > This still applies cleanly, I'll test it later.
> >
> > Thank you Arnaldo! While I have some small modifications
> > but the functionality should be the same. Please let me know
> > if you have any suggestions.
>
> So, it is failing here with:
So that 'fd' variable and the cpumap.h problems goes away when the last
patch is applied, but for bisection its better to apply the patch below
to 4/5.
I tested it all, cool stuff, will you resubmit soon?
I pushed it to the tmp.perf/ftrace_bpf branch on my repo at:
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/log/?h=tmp.perf/ftrace_bpf
With committer testing notes.
- Arnaldo
> Fixed with:
>
>
> diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> index 1975a6fe73c9fa8b..f5b49fc056ab8b95 100644
> --- a/tools/perf/util/bpf_ftrace.c
> +++ b/tools/perf/util/bpf_ftrace.c
> @@ -5,6 +5,7 @@
>
> #include <linux/err.h>
>
> +#include "util/cpumap.h"
> #include "util/ftrace.h"
> #include "util/debug.h"
> #include "util/bpf_counter.h"
> @@ -15,7 +16,7 @@ static struct func_latency_bpf *skel;
>
> int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
> {
> - int fd, err;
> + int err;
> struct filter_entry *func;
> struct bpf_link *begin_link, *end_link;
>
--
- Arnaldo
Hi Arnaldo,
On Wed, Dec 15, 2021 at 8:08 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Wed, Dec 15, 2021 at 12:30:10PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, Dec 13, 2021 at 11:40:16AM -0800, Namhyung Kim escreveu:
> > > On Mon, Dec 13, 2021 at 10:24 AM Arnaldo Carvalho de Melo
> > > <[email protected]> wrote:
> > > >
> > > > Em Mon, Nov 29, 2021 at 03:18:25PM -0800, Namhyung Kim escreveu:
> > > > > Hello,
> > > > >
> > > > > I've implemented 'latency' subcommand in the perf ftrace command to
> > > > > show a histogram of function latency.
> > > >
> > > > This still applies cleanly, I'll test it later.
> > >
> > > Thank you Arnaldo! While I have some small modifications
> > > but the functionality should be the same. Please let me know
> > > if you have any suggestions.
> >
> > So, it is failing here with:
>
> So that 'fd' variable and the cpumap.h problems goes away when the last
> patch is applied, but for bisection its better to apply the patch below
> to 4/5.
Sure, I will move them. Thanks for pointing that out.
>
> I tested it all, cool stuff, will you resubmit soon?
Yep!
>
> I pushed it to the tmp.perf/ftrace_bpf branch on my repo at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
>
> https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/log/?h=tmp.perf/ftrace_bpf
>
> With committer testing notes.
Thanks for the test!
Namhyung
>
> > Fixed with:
> >
> >
> > diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> > index 1975a6fe73c9fa8b..f5b49fc056ab8b95 100644
> > --- a/tools/perf/util/bpf_ftrace.c
> > +++ b/tools/perf/util/bpf_ftrace.c
> > @@ -5,6 +5,7 @@
> >
> > #include <linux/err.h>
> >
> > +#include "util/cpumap.h"
> > #include "util/ftrace.h"
> > #include "util/debug.h"
> > #include "util/bpf_counter.h"
> > @@ -15,7 +16,7 @@ static struct func_latency_bpf *skel;
> >
> > int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
> > {
> > - int fd, err;
> > + int err;
> > struct filter_entry *func;
> > struct bpf_link *begin_link, *end_link;
> >
>
> --
>
> - Arnaldo
On Mon, Dec 6, 2021 at 9:16 AM Namhyung Kim <[email protected]> wrote:
>
> Hello Athira,
>
> On Sun, Dec 5, 2021 at 6:24 PM Athira Rajeev
> <[email protected]> wrote:
> >
> >
> >
> > > On 30-Nov-2021, at 4:48 AM, Namhyung Kim <[email protected]> wrote:
> > >
> > > The -b/--use-bpf option is to use BPF to get latency info of kernel
> > > functions. It'd have better performance impact and I observed that
> > > latency of same function is smaller than before when using BPF.
> > >
> > > Signed-off-by: Namhyung Kim <[email protected]>
> > > ---
> [SNIP]
> > > @@ -1144,6 +1166,8 @@ int cmd_ftrace(int argc, const char **argv)
> > > const struct option latency_options[] = {
> > > OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
> > > "Show latency of given function", parse_filter_func),
> > > + OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
> > > + "Use BPF to measure function latency"),
> >
> >
> > Hi Namhyung,
> >
> > Can this be inside BPF_SKEL check, similar to how we have “bpf-prog” and other options in builtin-stat.c ?
> >
> > #ifdef HAVE_BPF_SKEL
> > <<OPT_BOOLEAN for use-bpf>>
> > #endif
> >
> > Otherwise when using “-b” and if perf is not built with BPF_SKEL, we will just return in perf_ftrace__latency_prepare_bpf without any error messages.
>
> Thanks for reporting this. Yeah, it should report
> error messages in such conditions.
>
> I think it'd be better to add an error message
> rather than hiding the option.
Well, now we build perf with BPF by default.
So I think it's ok to follow your suggestion.
it'd show the usage and options when the -b option is used
and BPF support is not enabled.
Thanks,
Namhyung
On Wed, Dec 15, 2021 at 10:15 AM Namhyung Kim <[email protected]> wrote:
> Well, now we build perf with BPF by default.
> So I think it's ok to follow your suggestion.
> it'd show the usage and options when the -b option is used
> and BPF support is not enabled.
Oh, it seems we're not there yet. :)
But anyway I think we can get there soon so I'll make the change.
Thanks,
Namhyung