Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758566Ab3G3JWS (ORCPT ); Tue, 30 Jul 2013 05:22:18 -0400 Received: from LGEMRELSE6Q.lge.com ([156.147.1.121]:57152 "EHLO LGEMRELSE6Q.lge.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758440Ab3G3JTY (ORCPT ); Tue, 30 Jul 2013 05:19:24 -0400 X-AuditID: 9c930179-b7c49ae000000e68-e5-51f785176e0d From: Namhyung Kim To: Arnaldo Carvalho de Melo Cc: Peter Zijlstra , Paul Mackerras , Ingo Molnar , Namhyung Kim , LKML , Steven Rostedt , Frederic Weisbecker , Jiri Olsa , David Ahern , Stephane Eranian , Jeremy Eder Subject: [PATCH 07/17] perf ftrace: Add 'record' sub-command Date: Tue, 30 Jul 2013 18:19:04 +0900 Message-Id: <1375175954-798-8-git-send-email-namhyung@kernel.org> X-Mailer: git-send-email 1.7.11.7 In-Reply-To: <1375175954-798-1-git-send-email-namhyung@kernel.org> References: <1375175954-798-1-git-send-email-namhyung@kernel.org> X-Brightmail-Tracker: AAAAAA== Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 19268 Lines: 764 From: Namhyung Kim The ftrace record command is for saving raw ftrace buffer contents which can be get from per_cpu/cpuX/trace_pipe_raw. Since ftrace events are generated very frequently so single thread for recording mostly resulted in buffer overruns. Thus it uses per-cpu recorder thread to prevent such cases and they save the contents to their own files. These per-cpu data files are saved in a directory so that they can be easily found when needed. I chose the default directory name as "perf.data.dir" and the first two (i.e. "perf.data") can be changed with -o option. The structure of the directory looks like: $ tree perf.data.dir perf.data.dir/ |-- perf.header |-- trace-cpu0.buf |-- trace-cpu1.buf |-- trace-cpu2.buf `-- trace-cpu3.buf In addition to trace-cpuX.buf files, it has perf.header file also. The perf.header file is compatible with existing perf.data format and contains usual event information, feature mask and sample data. The sample data is synthesized to indicate given cpu has a record file. Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Namhyung Kim --- tools/perf/builtin-ftrace.c | 640 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 623 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 1bb6d1ff0eb1..46b5c136e4bb 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "util/debug.h" #include "util/parse-options.h" @@ -19,14 +20,17 @@ #include "util/target.h" #include "util/thread_map.h" #include "util/cpumap.h" +#include "util/trace-event.h" #define DEFAULT_TRACER "function_graph" +#define DEFAULT_DIRNAME "perf.data" struct perf_ftrace { struct perf_evlist *evlist; struct perf_target target; const char *tracer; + const char *dirname; }; static bool done; @@ -171,40 +175,56 @@ static int reset_tracing_cpu(void) return 0; } -static int do_ftrace_live(struct perf_ftrace *ftrace) +static int setup_tracing_files(struct perf_ftrace *ftrace) { - char *trace_file; - int trace_fd; - char buf[4096]; - /* sleep 1ms if no data read */ - struct timespec req = { .tv_nsec = 1000000 }; - - signal(SIGINT, sig_handler); - signal(SIGUSR1, sig_handler); - signal(SIGCHLD, sig_handler); + int ret = -1; - if (reset_tracing_files(ftrace) < 0) + if (reset_tracing_files(ftrace) < 0) { + pr_err("failed to reset tracing files\n"); goto out; + } /* reset ftrace buffer */ - if (write_tracing_file("trace", "0") < 0) + if (write_tracing_file("trace", "0") < 0) { + pr_err("failed to reset ftrace buffer\n"); goto out; + } if (set_tracing_pid(ftrace) < 0) { pr_err("failed to set ftrace pid\n"); - goto out_reset; + goto out; } if (set_tracing_cpu(ftrace) < 0) { pr_err("failed to set tracing cpumask\n"); - goto out_reset; + goto out; } if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); - goto out_reset; + goto out; } + ret = 0; +out: + return ret; +} + +static int do_ftrace_live(struct perf_ftrace *ftrace) +{ + char *trace_file; + int trace_fd; + char buf[4096]; + /* sleep 1ms if no data read */ + struct timespec req = { .tv_nsec = 1000000 }; + + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + + if (setup_tracing_files(ftrace) < 0) + goto out_reset; + trace_file = get_tracing_file("trace_pipe"); if (!trace_file) { pr_err("failed to open trace_pipe\n"); @@ -259,7 +279,523 @@ out_close_fd: close(trace_fd); out_reset: reset_tracing_files(ftrace); + return done ? 0 : -1; +} + +static int alloc_ftrace_evsel(struct perf_ftrace *ftrace) +{ + if (!strcmp(ftrace->tracer, "function")) { + if (perf_evlist__add_newtp(ftrace->evlist, "ftrace", + "function", NULL) < 0) { + pr_err("failed to allocate ftrace event\n"); + return -1; + } + } else if (!strcmp(ftrace->tracer, "function_graph")) { + if (perf_evlist__add_newtp(ftrace->evlist, "ftrace", + "funcgraph_entry", NULL) || + perf_evlist__add_newtp(ftrace->evlist, "ftrace", + "funcgraph_exit", NULL)) { + pr_err("failed to allocate ftrace event\n"); + return -1; + } + } else { + pr_err("Not supported tracer: %s\n", ftrace->tracer); + return -1; + } + return 0; +} + +static void canonicalize_directory_name(const char *dirname) +{ + char *suffix = strstr(dirname, ".dir"); + + if (suffix) { + if (suffix[4] == '\0' || suffix[4] == '/') + *suffix = '\0'; + } +} + +static int remove_directory(const char *pathname) +{ + DIR *dir; + int ret = 0; + struct dirent *dent; + char namebuf[PATH_MAX]; + + dir = opendir(pathname); + if (dir == NULL) + return 0; + + while ((dent = readdir(dir)) != NULL && !ret) { + struct stat statbuf; + + if (dent->d_name[0] == '.') + continue; + + scnprintf(namebuf, sizeof(namebuf), "%s/%s", + pathname, dent->d_name); + + ret = stat(namebuf, &statbuf); + if (ret < 0) { + pr_debug("stat failed\n"); + break; + } + + if (S_ISREG(statbuf.st_mode)) + ret = unlink(namebuf); + else if (S_ISDIR(statbuf.st_mode)) + ret = remove_directory(namebuf); + else { + pr_debug("unknown file.\n"); + ret = -1; + } + } + closedir(dir); + + if (ret < 0) + return ret; + + return rmdir(pathname); +} + +static int create_perf_header(struct perf_ftrace *ftrace) +{ + int err; + char buf[PATH_MAX]; + struct stat statbuf; + + canonicalize_directory_name(ftrace->dirname); + + scnprintf(buf, sizeof(buf), "%s.dir", ftrace->dirname); + + if (!stat(buf, &statbuf) && S_ISDIR(statbuf.st_mode)) { + /* same name already exists - rename to *.old.dir */ + char *old_name = malloc(strlen(buf) + 5); + if (old_name == NULL) + return -1; + + scnprintf(old_name, strlen(buf) + 5, + "%s.old.dir", ftrace->dirname); + + if (remove_directory(old_name) < 0) { + perror("rmdir"); + return -1; + } + + if (rename(buf, old_name) < 0) { + perror("rename"); + free(old_name); + return -1; + } + + free(old_name); + } + + err = mkdir(buf, 0755); + if (err < 0) { + perror("mkdir"); + return -1; + } + + strcat(buf, "/perf.header"); + + err = open(buf, O_RDWR | O_CREAT | O_TRUNC, 0644); + return err; +} + +static void sig_dummy_handler(int sig __maybe_unused) +{ + while (!done) + continue; +} + +enum { + RECORD_STATE__ERROR = -1, + RECORD_STATE__INIT, + RECORD_STATE__READY, + RECORD_STATE__DONE, +}; + +struct ftrace_record_arg { + struct perf_ftrace *ftrace; + int cpu; + int state; + pthread_t id; + struct list_head node; +}; + +static int recorder_count; +pthread_cond_t recorder_ready_cond = PTHREAD_COND_INITIALIZER; +pthread_cond_t recorder_start_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t recorder_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void *record_ftrace_raw_buffer(void *arg) +{ + struct ftrace_record_arg *fra = arg; + char buf[4096]; + char *trace_file; + int trace_fd; + int output_fd; + off_t byte_written = 0; + sigset_t sigmask; + /* sleep 1ms if no data read */ + struct timespec req = { .tv_nsec = 1000000 }; + + fra->state = RECORD_STATE__ERROR; + + snprintf(buf, sizeof(buf), "per_cpu/cpu%d/trace_pipe_raw", fra->cpu); + + trace_file = get_tracing_file(buf); + if (!trace_file) { + pr_err("failed to get trace_pipe_raw\n"); + goto out; + } + + trace_fd = open(trace_file, O_RDONLY); + + put_tracing_file(trace_file); + + if (trace_fd < 0) { + pr_err("failed to open trace_pipe_raw\n"); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.dir/trace-cpu%d.buf", + fra->ftrace->dirname, fra->cpu); + + output_fd = open(buf, O_WRONLY|O_CREAT|O_TRUNC, 0644); + if (output_fd < 0) { + pr_err("failed to open output file\n"); + goto out_close; + } + + fra->state = RECORD_STATE__READY; + + /* + * block all signals but SIGUSR2. + * It'll be used to unblock a recorder to finish. + */ + sigfillset(&sigmask); + sigdelset(&sigmask, SIGUSR2); + pthread_sigmask(SIG_SETMASK, &sigmask,NULL); + + signal(SIGUSR2, sig_dummy_handler); + + fcntl(trace_fd, F_SETFL, O_NONBLOCK); + + /* Now I'm ready */ + pthread_mutex_lock(&recorder_mutex); + recorder_count++; + pthread_cond_signal(&recorder_ready_cond); + pthread_cond_wait(&recorder_start_cond, &recorder_mutex); + pthread_mutex_unlock(&recorder_mutex); + + pr_debug2("now recording for cpu%d\n", fra->cpu); + + while (!done) { + int n = read(trace_fd, buf, sizeof(buf)); + + if (n < 0) { + if (errno == EINTR || errno == EAGAIN) + goto sleep; + else + break; + } else if (n == 0) { +sleep: + clock_nanosleep(CLOCK_MONOTONIC, 0, &req, NULL); + } else if (write(output_fd, buf, n) != n) + break; + + byte_written += n; + } + + /* read remaining buffer contents */ + while (true) { + int n = read(trace_fd, buf, sizeof(buf)); + + if (n < 0) + goto out_close; + if (n == 0) + break; + if (write(output_fd, buf, n) != n) + goto out_close; + + byte_written += n; + } + fra->state = RECORD_STATE__DONE; + +out_close: + close(trace_fd); out: + if (fra->state == RECORD_STATE__ERROR) { + /* + * We need to update recorder_count in this case also + * in order to prevent deadlocking in the main thread. + */ + pthread_mutex_lock(&recorder_mutex); + recorder_count++; + pthread_cond_signal(&recorder_ready_cond); + pthread_mutex_unlock(&recorder_mutex); + } + return fra; +} + +static void *synthesize_raw_data(struct perf_evsel *evsel) +{ + void *data = NULL; + u32 data_size; + + if (!strcmp(evsel->tp_format->name, "function")) { + struct { + unsigned short common_type; + unsigned char common_flags; + unsigned char common_preempt_count; + int common_pid; + int common_padding; + + unsigned long ip; + unsigned long parent_ip; + } function_format = { + .common_type = evsel->attr.config, + }; + + data_size = sizeof(function_format); + + data = malloc(data_size + sizeof(u32)); + if (data == NULL) + return NULL; + + memcpy(data, &data_size, sizeof(data_size)); + memcpy(data + sizeof(data_size), &function_format, + sizeof(function_format)); + } else if (!strcmp(evsel->tp_format->name, "funcgraph_entry")) { + struct { + unsigned short common_type; + unsigned char common_flags; + unsigned char common_preempt_count; + int common_pid; + int common_padding; + + unsigned long func; + int depth; + } funcgraph_entry_format = { + .common_type = evsel->attr.config, + }; + + data_size = sizeof(funcgraph_entry_format); + + data = malloc(data_size + sizeof(u32)); + if (data == NULL) + return NULL; + + memcpy(data, &data_size, sizeof(data_size)); + memcpy(data + sizeof(data_size), &funcgraph_entry_format, + sizeof(funcgraph_entry_format)); + } + return data; +} + +static int do_ftrace_record(struct perf_ftrace *ftrace) +{ + int i, err, feat; + int perf_fd; + LIST_HEAD(recorders); + struct perf_session *session; + struct ftrace_record_arg *fra, *tmp; + + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + + if (setup_tracing_files(ftrace) < 0) + goto out_reset; + + alloc_ftrace_evsel(ftrace); + + perf_fd = create_perf_header(ftrace); + if (perf_fd < 0) { + pr_err("failed to create perf directory\n"); + goto out_reset; + } + + /* just use a dummy session for header recording */ + session = zalloc(sizeof(*session)); + if (session == NULL) { + pr_err("failed to allocate perf session\n"); + goto out_close; + } + session->evlist = ftrace->evlist; + + for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) + perf_header__set_feat(&session->header, feat); + + perf_header__clear_feat(&session->header, HEADER_BUILD_ID); + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + perf_header__clear_feat(&session->header, HEADER_PMU_MAPPINGS); + perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); + + err = perf_session__write_header(session, ftrace->evlist, + perf_fd, false); + if (err < 0) { + pr_err("failed to write perf header\n"); + goto out_session; + } + + /* + * We record ftrace's per_cpu buffer so that it'd better having + * corresponding cpu maps anyway. + */ + if (!perf_target__has_cpu(&ftrace->target)) { + struct cpu_map *new_map; + + new_map = cpu_map__new(NULL); + if (new_map == NULL) { + pr_err("failed to create new cpu map\n"); + goto out_session; + } + + /* replace existing cpu map */ + cpu_map__delete(ftrace->evlist->cpus); + ftrace->evlist->cpus = new_map; + } + + for (i = 0; i < cpu_map__nr(ftrace->evlist->cpus); i++) { + fra = malloc(sizeof(*fra)); + if (fra == NULL) { + pr_err("not enough memory!\n"); + goto out_fra; + } + + fra->ftrace = ftrace; + fra->cpu = ftrace->evlist->cpus->map[i]; + fra->state = RECORD_STATE__INIT; + list_add_tail(&fra->node, &recorders); + + err = pthread_create(&fra->id, NULL, + record_ftrace_raw_buffer, fra); + if (err < 0) { + pr_err("failed to create recorder thread\n"); + goto out_fra; + } + } + + /* wait for all recorders ready */ + pthread_mutex_lock(&recorder_mutex); + while (recorder_count != cpu_map__nr(ftrace->evlist->cpus)) + pthread_cond_wait(&recorder_ready_cond, &recorder_mutex); + pthread_mutex_unlock(&recorder_mutex); + + list_for_each_entry(fra, &recorders, node) { + if (fra->state != RECORD_STATE__READY) { + pr_err("cpu%d: failed to start recorder", fra->cpu); + goto out_fra; + } + } + + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_fra; + } + + perf_evlist__start_workload(ftrace->evlist); + + pr_debug2("start recording per cpu buffers\n"); + pthread_mutex_lock(&recorder_mutex); + pthread_cond_broadcast(&recorder_start_cond); + pthread_mutex_unlock(&recorder_mutex); + + /* wait for signal/finish */ + pause(); + + if (write_tracing_file("tracing_on", "0") < 0) { + pr_err("can't disable tracing\n"); + goto out_fra; + } + + /* signal recorders to terminate */ + list_for_each_entry(fra, &recorders, node) { + pr_debug2("killing recorder thread for cpu%d\n", fra->cpu); + pthread_kill(fra->id, SIGUSR2); + } + + list_for_each_entry(fra, &recorders, node) + pthread_join(fra->id, NULL); + + /* synthesize sample data */ + list_for_each_entry(fra, &recorders, node) { + struct perf_evsel *evsel = perf_evlist__first(ftrace->evlist); + union perf_event event = { + .sample = { + .header = { + .type = PERF_RECORD_SAMPLE, + .misc = PERF_RECORD_MISC_KERNEL, + .size = sizeof(event.sample.header) + + evsel->sample_size, + }, + }, + }; + struct perf_sample sample = { + .cpu = fra->cpu, + .period = 1, + }; + void *raw_data; + u32 raw_size; + int orig_size; + + if (fra->state != RECORD_STATE__DONE) { + pr_warning("recorder failed for some reason on cpu%d\n", + fra->cpu); + continue; + } + + perf_event__synthesize_sample(&event, evsel->attr.sample_type, + &sample, false); + + raw_data = synthesize_raw_data(evsel); + if (raw_data == NULL) { + pr_err("synthesizing raw sample failed\n"); + goto out_fra; + } + + /* + * start of raw data is the size of raw data excluding itself. + */ + raw_size = sizeof(u32) + (*(u32 *) raw_data); + + orig_size = event.sample.header.size; + event.sample.header.size += raw_size; + + err = write(perf_fd, &event.sample, orig_size); + if (err != orig_size) { + pr_err("write error occurred\n"); + free(raw_data); + goto out_fra; + } + + err = write(perf_fd, raw_data, raw_size); + free(raw_data); + + if (err != (int)raw_size) { + pr_err("write error occurred\n"); + goto out_fra; + } + + session->header.data_size += event.sample.header.size; + } + + perf_session__write_header(session, ftrace->evlist, perf_fd, true); + +out_fra: + list_for_each_entry_safe(fra, tmp, &recorders, node) { + list_del(&fra->node); + free(fra); + } +out_session: + free(session); +out_close: + close(perf_fd); +out_reset: + reset_tracing_files(ftrace); return done ? 0 : -1; } @@ -326,6 +862,74 @@ out: return ret; } +static int +__cmd_ftrace_record(struct perf_ftrace *ftrace, int argc, const char **argv) +{ + int ret = -1; + const char * const record_usage[] = { + "perf ftrace record [] []", + "perf ftrace record [] -- []", + NULL + }; + const struct option record_options[] = { + OPT_STRING('t', "tracer", &ftrace->tracer, "tracer", + "tracer to use: function_graph or function"), + OPT_STRING('p', "pid", &ftrace->target.pid, "pid", + "trace on existing process id"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose"), + OPT_BOOLEAN('a', "all-cpus", &ftrace->target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('C', "cpu", &ftrace->target.cpu_list, "cpu", + "list of cpus to monitor"), + OPT_STRING('o', "output", &ftrace->dirname, "dirname", + "input directory name to use (default: perf.data)"), + OPT_END() + }; + + argc = parse_options(argc, argv, record_options, record_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (!argc && perf_target__none(&ftrace->target)) + usage_with_options(record_usage, record_options); + + ret = perf_target__validate(&ftrace->target); + if (ret) { + char errbuf[512]; + + perf_target__strerror(&ftrace->target, ret, errbuf, 512); + pr_err("%s\n", errbuf); + return -EINVAL; + } + + ftrace->evlist = perf_evlist__new(); + if (ftrace->evlist == NULL) + return -ENOMEM; + + ret = perf_evlist__create_maps(ftrace->evlist, &ftrace->target); + if (ret < 0) + goto out; + + if (ftrace->tracer == NULL) + ftrace->tracer = DEFAULT_TRACER; + + if (ftrace->dirname == NULL) + ftrace->dirname = DEFAULT_DIRNAME; + + if (argc && perf_evlist__prepare_workload(ftrace->evlist, + &ftrace->target, + argv, false, true) < 0) + goto out_maps; + + ret = do_ftrace_record(ftrace); + +out_maps: + perf_evlist__delete_maps(ftrace->evlist); +out: + perf_evlist__delete(ftrace->evlist); + + return ret; +} + int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) { int ret; @@ -333,8 +937,8 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) .target = { .uid = UINT_MAX, }, }; const char * const ftrace_usage[] = { - "perf ftrace {live} [] []", - "perf ftrace {live} [] -- []", + "perf ftrace {live|record} [] []", + "perf ftrace {live|record} [] -- []", NULL }; const struct option ftrace_options[] = { @@ -353,6 +957,8 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) if (strcmp(argv[0], "live") == 0) { ret = __cmd_ftrace_live(&ftrace, argc, argv); + } else if (strncmp(argv[0], "rec", 3) == 0) { + ret = __cmd_ftrace_record(&ftrace, argc, argv); } else { usage_with_options(ftrace_usage, ftrace_options); } -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/