Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752446AbcKRA0l (ORCPT ); Thu, 17 Nov 2016 19:26:41 -0500 Received: from mail-wm0-f67.google.com ([74.125.82.67]:36783 "EHLO mail-wm0-f67.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751243AbcKRA0e (ORCPT ); Thu, 17 Nov 2016 19:26:34 -0500 From: Alexis Berlemont To: linux-kernel@vger.kernel.org Cc: Alexis Berlemont , peterz@infradead.org, mingo@redhat.com, acme@kernel.org, alexander.shishkin@linux.intel.com Subject: [PATCH v3 2/2] perf: add page fault duration measures in perf trace Date: Fri, 18 Nov 2016 01:21:38 +0100 Message-Id: <20161118002138.29150-3-alexis.berlemont@gmail.com> X-Mailer: git-send-email 2.10.2 In-Reply-To: <20161118002138.29150-1-alexis.berlemont@gmail.com> References: <20161118002138.29150-1-alexis.berlemont@gmail.com> In-Reply-To: <20161027083714.GF3568@worktop.programming.kicks-ass.net> References: <20161027083714.GF3568@worktop.programming.kicks-ass.net> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11194 Lines: 360 Thanks to the tracepoints exceptions:page_fault_kernel_exit and exceptions:page_fault_user_exit, the subcommand perf-trace can now print the duration of page-fault exception handling just like it is done for syscalls. Here is the result; the page fault durations are indicated in parenthesis (like syscalls): Here is an example; the durations are indicated in parenthesis just like with syscalls: 0.053 ( 0.053 ms): ls/152 minfault [__clear_user+0x25] => 0x694e7e (?k) 0.114 ( 0.027 ms): ls/152 minfault [__clear_user+0x25] => 0x7fb912eaafc0 (?k) 0.201 ( 0.030 ms): ls/152 minfault [_start+0x0] => /lib/ld-2.23.so@0xc70 (x.) ... Signed-off-by: Alexis Berlemont --- tools/perf/Documentation/perf-trace.txt | 4 +- tools/perf/builtin-trace.c | 231 ++++++++++++++++++++++++++++---- 2 files changed, 208 insertions(+), 27 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 781b019..53c103c 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -117,7 +117,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. -F=[all|min|maj]:: --pf=[all|min|maj]:: Trace pagefaults. Optionally, you can specify whether you want minor, - major or all pagefaults. Default value is maj. + major or all pagefaults. Default value is maj. Durations of + page-fault handling will be printed if possible (need for some + architecture-dependent tracepoints). --syscalls:: Trace system calls. This options is enabled by default. diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5f45166..f0957a8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -861,6 +861,8 @@ struct thread_trace { } paths; struct intlist *syscall_stats; + u64 pgfault_entry_time; + char *pgfault_entry_str; }; static struct thread_trace *thread_trace__new(void) @@ -1797,21 +1799,56 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, return 0; } -static void print_location(FILE *f, struct perf_sample *sample, - struct addr_location *al, - bool print_dso, bool print_sym) +static int trace__pgfault_enter(struct trace *trace, + struct perf_evsel *evsel __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample) { + struct thread *thread; + struct thread_trace *ttrace; + int err = -1; + + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + if (!thread) + goto out; + + ttrace = thread__trace(thread, trace->output); + if (ttrace == NULL) + goto out_put; + + ttrace->pgfault_entry_time = sample->time; + +out: + err = 0; +out_put: + thread__put(thread); + return err; +} + +static size_t scnprintf_location(char *bf, size_t size, + struct perf_sample *sample, + struct addr_location *al, + bool print_dso, bool print_sym) +{ + size_t printed = 0; if ((verbose || print_dso) && al->map) - fprintf(f, "%s@", al->map->dso->long_name); + printed += scnprintf(bf + printed, + size - printed, "%s@", al->map->dso->long_name); if ((verbose || print_sym) && al->sym) - fprintf(f, "%s+0x%" PRIx64, al->sym->name, - al->addr - al->sym->start); + printed += scnprintf(bf + printed, + size - printed, + "%s+0x%" PRIx64, al->sym->name, + al->addr - al->sym->start); else if (al->map) - fprintf(f, "0x%" PRIx64, al->addr); + printed += scnprintf(bf + printed, + size - printed, "0x%" PRIx64, al->addr); else - fprintf(f, "0x%" PRIx64, sample->addr); + printed += scnprintf(bf + printed, + size - printed, "0x%" PRIx64, sample->addr); + + return printed; } static int trace__pgfault(struct trace *trace, @@ -1823,13 +1860,22 @@ static int trace__pgfault(struct trace *trace, struct addr_location al; char map_type = 'd'; struct thread_trace *ttrace; + size_t printed = 0; int err = -1; int callchain_ret = 0; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + if (!thread) + goto out; - if (sample->callchain) { - callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); + ttrace = thread__trace(thread, trace->output); + if (ttrace == NULL) + goto out_put; + + if (!ttrace->pgfault_entry_time && sample->callchain) { + callchain_ret = + trace__resolve_callchain(trace, evsel, + sample, &callchain_cursor); if (callchain_ret == 0) { if (callchain_cursor.nr < trace->min_stack) goto out_put; @@ -1837,10 +1883,6 @@ static int trace__pgfault(struct trace *trace, } } - ttrace = thread__trace(thread, trace->output); - if (ttrace == NULL) - goto out_put; - if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) ttrace->pfmaj++; else @@ -1849,18 +1891,27 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) goto out; + if (ttrace->pgfault_entry_str == NULL) { + ttrace->pgfault_entry_str = malloc(trace__entry_str_size); + if (!ttrace->pgfault_entry_str) + goto out_put; + ttrace->pgfault_entry_str[0] = '\0'; + } + thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); - - fprintf(trace->output, "%sfault [", - evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? - "maj" : "min"); + printed += scnprintf(ttrace->pgfault_entry_str + printed, + trace__entry_str_size - printed, "%sfault [", + evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? + "maj" : "min"); - print_location(trace->output, sample, &al, false, true); + printed += scnprintf_location(ttrace->pgfault_entry_str + printed, + trace__entry_str_size - printed, + sample, &al, false, true); - fprintf(trace->output, "] => "); + printed += scnprintf(ttrace->pgfault_entry_str + printed, + trace__entry_str_size - printed, "] => "); thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, sample->addr, &al); @@ -1875,14 +1926,97 @@ static int trace__pgfault(struct trace *trace, map_type = '?'; } - print_location(trace->output, sample, &al, true, false); + printed += scnprintf_location(ttrace->pgfault_entry_str + printed, + trace__entry_str_size - printed, + sample, &al, true, false); + + printed += scnprintf(ttrace->pgfault_entry_str + printed, + trace__entry_str_size - printed, + " (%c%c)", map_type, al.level); + + if (!ttrace->pgfault_entry_time) { + trace__fprintf_entry_head(trace, thread, + 0, sample->time, trace->output); + fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str); + + if (callchain_ret > 0) + trace__fprintf_callchain(trace, sample); + else if (callchain_ret < 0) + pr_err("Problem processing %s callchain, skipping...\n", + perf_evsel__name(evsel)); + } + +out: + err = 0; +out_put: + thread__put(thread); + return err; +} + +static int trace__pgfault_exit(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, + struct perf_sample *sample) +{ + struct thread *thread; + struct thread_trace *ttrace; + u64 duration = 0; + int err = -1, callchain_ret = 0; + + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + + ttrace = thread__priv(thread); + if (!ttrace) + goto out_put; + + if (!ttrace->pgfault_entry_time) + goto out; + + /* + * The check below is necessary for a specific case: it is + * possible to enable only major (or minor) page faults + * software events but it is impossible to filter page-fault + * related tracepoints according to the major / minor + * characteristic. + */ + + if (!ttrace->pgfault_entry_str || + strlen(ttrace->pgfault_entry_str) == 0) + goto out; + + if (sample->callchain) { + callchain_ret = + trace__resolve_callchain(trace, evsel, + sample, &callchain_cursor); + if (callchain_ret == 0) { + if (callchain_cursor.nr < trace->min_stack) + goto out; + callchain_ret = 1; + } + } + + if (ttrace->pgfault_entry_time) { + duration = sample->time - ttrace->pgfault_entry_time; + if (trace__filter_duration(trace, duration)) + goto out; + } + + trace__fprintf_entry_head(trace, thread, + duration, sample->time, trace->output); - fprintf(trace->output, " (%c%c)\n", map_type, al.level); + fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str); + + /* + * Once the string is printed; clear it just in case the next + * software events is filtered (because of major / minor. + */ + ttrace->pgfault_entry_str[0] = '\0'; if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) - pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + pr_err("Problem processing %s callchain, skipping...\n", + perf_evsel__name(evsel)); + out: err = 0; out_put: @@ -2062,6 +2196,43 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config) return evsel; } +static int trace__add_pgfault_newtp(struct trace *trace) +{ + int err = -1; + struct perf_evlist *evlist = trace->evlist; + + /* + * The tracepoints exceptions::page_fault_* are not available + * on all architecture.; so, we consider it is not an error if + * the 1st one's initialization is KO... + */ + err = perf_evlist__add_newtp(evlist, "exceptions", + "page_fault_kernel_exit", trace__pgfault_exit); + if (err < 0) + return 0; + + /* + * ...however, if the 2nd, 3rd or 4th init is KO; there is + * definitely something wrong not related with the absence of + * tracepoints + */ + err = perf_evlist__add_newtp(evlist, "exceptions", + "page_fault_user_exit", trace__pgfault_exit); + if (err < 0) + return -1; + + err = perf_evlist__add_newtp(evlist, "exceptions", + "page_fault_kernel_entry", + trace__pgfault_enter); + if (err < 0) + return -1; + + err = perf_evlist__add_newtp(evlist, "exceptions", + "page_fault_user_entry", trace__pgfault_enter); + + return err; +} + static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { const u32 type = event->header.type; @@ -2193,9 +2364,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add(evlist, pgfault_min); } + if (trace->trace_pgfaults && trace__add_pgfault_newtp(trace)) + goto out_error_pgfault_tp; + if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) + perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", + trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; err = perf_evlist__create_maps(evlist, &trace->opts.target); @@ -2399,6 +2573,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); goto out_error; +out_error_pgfault_tp: + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), + "exceptions", "page_fault_(kernel|user)"); + goto out_error; + out_error_mmap: perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); goto out_error; -- 2.10.2