Linus,
Please pull the latest perf-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus
HEAD: dbc33f7016747bfce64f3d1e3af63f1251fcbf85 perf/x86: Fix uncore PCI fixed counter handling
Various fixes. (The -g perf report lockup you reported is only partially
addressed, patches that fix the excessive runtime are still being worked
on.)
Thanks,
Ingo
------------------>
Adrian Hunter (4):
perf tools: Add test for parsing with no sample_id_all bit
perf evlist: Fix parsing with no sample_id_all bit set
perf evlist: Fix id pos in perf_evlist__open()
perf kvm: Fix sample_type manipulation
Arnaldo Carvalho de Melo (4):
perf trace: Check control+C more often
perf hists: Fix formatting of long symbol names
perf trace: Handle perf.data files with no tracepoints
perf: Fix up MMAP2 buffer space reservation
Jiri Olsa (1):
perf session: Separate progress bar update when processing events
Kyle McMartin (1):
perf trace: Check if MAP_32BIT is defined
Oleg Nesterov (1):
uprobes: Fix utask->depth accounting in handle_trampoline()
Stephane Eranian (3):
perf tools: Add attr->mmap2 support
perf/x86: Add constraint for IVB CYCLE_ACTIVITY:CYCLES_LDM_PENDING
perf/x86: Fix uncore PCI fixed counter handling
arch/x86/kernel/cpu/perf_event_intel.c | 1 +
arch/x86/kernel/cpu/perf_event_intel_uncore.c | 6 +-
kernel/events/core.c | 1 +
kernel/events/uprobes.c | 4 +-
tools/perf/Makefile | 3 +-
tools/perf/builtin-annotate.c | 1 +
tools/perf/builtin-inject.c | 15 ++++
tools/perf/builtin-kvm.c | 18 ++---
tools/perf/builtin-mem.c | 1 +
tools/perf/builtin-report.c | 1 +
tools/perf/builtin-script.c | 1 +
tools/perf/builtin-trace.c | 5 ++
tools/perf/tests/builtin-test.c | 4 +
tools/perf/tests/parse-no-sample-id-all.c | 108 ++++++++++++++++++++++++++
tools/perf/tests/perf-record.c | 15 +++-
tools/perf/tests/tests.h | 1 +
tools/perf/ui/stdio/hist.c | 23 ++++--
tools/perf/util/build-id.c | 1 +
tools/perf/util/event.c | 56 ++++++++++---
tools/perf/util/event.h | 19 +++++
tools/perf/util/evlist.c | 21 ++++-
tools/perf/util/evsel.c | 16 +++-
tools/perf/util/header.c | 3 +
tools/perf/util/machine.c | 53 ++++++++++++-
tools/perf/util/machine.h | 1 +
tools/perf/util/map.c | 8 +-
tools/perf/util/map.h | 8 +-
tools/perf/util/session.c | 68 ++++++++--------
tools/perf/util/tool.h | 1 +
29 files changed, 381 insertions(+), 82 deletions(-)
create mode 100644 tools/perf/tests/parse-no-sample-id-all.c
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0abf674..c62d883 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -124,6 +124,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index fd8011e..8ed4458 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2808,7 +2808,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve
return c;
}
- if (event->hw.config == ~0ULL)
+ if (event->attr.config == UNCORE_FIXED_EVENT)
return &constraint_fixed;
if (type->constraints) {
@@ -3112,7 +3112,9 @@ static int uncore_pmu_event_init(struct perf_event *event)
*/
if (pmu->type->single_fixed && pmu->pmu_idx > 0)
return -EINVAL;
- hwc->config = ~0ULL;
+
+ /* fixed counters have event field hardcoded to zero */
+ hwc->config = 0ULL;
} else {
hwc->config = event->attr.config & pmu->type->event_mask;
if (pmu->type->ops->hw_config) {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2207efc..dd236b6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5039,6 +5039,7 @@ static void perf_event_mmap_output(struct perf_event *event,
mmap_event->event_id.header.size += sizeof(mmap_event->maj);
mmap_event->event_id.header.size += sizeof(mmap_event->min);
mmap_event->event_id.header.size += sizeof(mmap_event->ino);
+ mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
}
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f356974..ad8e1bd 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1682,12 +1682,10 @@ static bool handle_trampoline(struct pt_regs *regs)
tmp = ri;
ri = ri->next;
kfree(tmp);
+ utask->depth--;
if (!chained)
break;
-
- utask->depth--;
-
BUG_ON(!ri);
}
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index c5dc1ad..3a0ff7f 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -394,6 +394,8 @@ ifeq ($(ARCH),x86)
LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
endif
LIB_OBJS += $(OUTPUT)tests/code-reading.o
+LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
+LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
@@ -439,7 +441,6 @@ PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
ifneq ($(OUTPUT),)
CFLAGS += -I$(OUTPUT)
endif
-LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
ifdef NO_LIBELF
EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f988d38..5ebd0c3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -277,6 +277,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 9b336fd..423875c 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -123,6 +123,19 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
return err;
}
+static int perf_event__repipe_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_mmap2(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
static int perf_event__repipe_fork(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -339,6 +352,7 @@ static int __cmd_inject(struct perf_inject *inject)
if (inject->build_ids || inject->sched_stat) {
inject->tool.mmap = perf_event__repipe_mmap;
+ inject->tool.mmap2 = perf_event__repipe_mmap2;
inject->tool.fork = perf_event__repipe_fork;
inject->tool.tracing_data = perf_event__repipe_tracing_data;
}
@@ -390,6 +404,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = perf_event__repipe_sample,
.mmap = perf_event__repipe,
+ .mmap2 = perf_event__repipe,
.comm = perf_event__repipe,
.fork = perf_event__repipe,
.exit = perf_event__repipe,
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 47b3540..935d522 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1165,16 +1165,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
struct perf_event_attr *attr = &pos->attr;
/* make sure these *are* set */
- attr->sample_type |= PERF_SAMPLE_TID;
- attr->sample_type |= PERF_SAMPLE_TIME;
- attr->sample_type |= PERF_SAMPLE_CPU;
- attr->sample_type |= PERF_SAMPLE_RAW;
+ perf_evsel__set_sample_bit(pos, TID);
+ perf_evsel__set_sample_bit(pos, TIME);
+ perf_evsel__set_sample_bit(pos, CPU);
+ perf_evsel__set_sample_bit(pos, RAW);
/* make sure these are *not*; want as small a sample as possible */
- attr->sample_type &= ~PERF_SAMPLE_PERIOD;
- attr->sample_type &= ~PERF_SAMPLE_IP;
- attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN;
- attr->sample_type &= ~PERF_SAMPLE_ADDR;
- attr->sample_type &= ~PERF_SAMPLE_READ;
+ perf_evsel__reset_sample_bit(pos, PERIOD);
+ perf_evsel__reset_sample_bit(pos, IP);
+ perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+ perf_evsel__reset_sample_bit(pos, ADDR);
+ perf_evsel__reset_sample_bit(pos, READ);
attr->mmap = 0;
attr->comm = 0;
attr->task = 0;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 791b432..253133a 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -190,6 +190,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.lost = perf_event__process_lost,
.fork = perf_event__process_fork,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 9725aa3..8e50d8d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -744,6 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 93a34ce..7f31a3d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -542,6 +542,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
static struct perf_tool perf_script = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b6f0725..f5aa637 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -100,7 +100,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
P_MMAP_FLAG(SHARED);
P_MMAP_FLAG(PRIVATE);
+#ifdef MAP_32BIT
P_MMAP_FLAG(32BIT);
+#endif
P_MMAP_FLAG(ANONYMOUS);
P_MMAP_FLAG(DENYWRITE);
P_MMAP_FLAG(EXECUTABLE);
@@ -994,6 +996,9 @@ again:
handler = evsel->handler.func;
handler(trace, evsel, &sample);
+
+ if (done)
+ goto out_unmap_evlist;
}
}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 8bbeba3..1e67437 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -112,6 +112,10 @@ static struct test {
.func = test__keep_tracking,
},
{
+ .desc = "Test parsing with no sample_id_all bit set",
+ .func = test__parse_no_sample_id_all,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
new file mode 100644
index 0000000..e117b6c
--- /dev/null
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -0,0 +1,108 @@
+#include <sys/types.h>
+#include <stddef.h>
+
+#include "tests.h"
+
+#include "event.h"
+#include "evlist.h"
+#include "header.h"
+#include "util.h"
+
+static int process_event(struct perf_evlist **pevlist, union perf_event *event)
+{
+ struct perf_sample sample;
+
+ if (event->header.type == PERF_RECORD_HEADER_ATTR) {
+ if (perf_event__process_attr(NULL, event, pevlist)) {
+ pr_debug("perf_event__process_attr failed\n");
+ return -1;
+ }
+ return 0;
+ }
+
+ if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+ return -1;
+
+ if (!*pevlist)
+ return -1;
+
+ if (perf_evlist__parse_sample(*pevlist, event, &sample)) {
+ pr_debug("perf_evlist__parse_sample failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int process_events(union perf_event **events, size_t count)
+{
+ struct perf_evlist *evlist = NULL;
+ int err = 0;
+ size_t i;
+
+ for (i = 0; i < count && !err; i++)
+ err = process_event(&evlist, events[i]);
+
+ if (evlist)
+ perf_evlist__delete(evlist);
+
+ return err;
+}
+
+struct test_attr_event {
+ struct attr_event attr;
+ u64 id;
+};
+
+/**
+ * test__parse_no_sample_id_all - test parsing with no sample_id_all bit set.
+ *
+ * This function tests parsing data produced on kernel's that do not support the
+ * sample_id_all bit. Without the sample_id_all bit, non-sample events (such as
+ * mmap events) do not have an id sample appended, and consequently logic
+ * designed to determine the id will not work. That case happens when there is
+ * more than one selected event, so this test processes three events: 2
+ * attributes representing the selected events and one mmap event.
+ *
+ * Return: %0 on success, %-1 if the test fails.
+ */
+int test__parse_no_sample_id_all(void)
+{
+ int err;
+
+ struct test_attr_event event1 = {
+ .attr = {
+ .header = {
+ .type = PERF_RECORD_HEADER_ATTR,
+ .size = sizeof(struct test_attr_event),
+ },
+ },
+ .id = 1,
+ };
+ struct test_attr_event event2 = {
+ .attr = {
+ .header = {
+ .type = PERF_RECORD_HEADER_ATTR,
+ .size = sizeof(struct test_attr_event),
+ },
+ },
+ .id = 2,
+ };
+ struct mmap_event event3 = {
+ .header = {
+ .type = PERF_RECORD_MMAP,
+ .size = sizeof(struct mmap_event),
+ },
+ };
+ union perf_event *events[] = {
+ (union perf_event *)&event1,
+ (union perf_event *)&event2,
+ (union perf_event *)&event3,
+ };
+
+ err = process_events(events, ARRAY_SIZE(events));
+ if (err)
+ return -1;
+
+ return 0;
+}
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 72d8881..b8a7056 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -50,7 +50,7 @@ int test__PERF_RECORD(void)
struct perf_sample sample;
const char *cmd = "sleep";
const char *argv[] = { cmd, "1", NULL, };
- char *bname;
+ char *bname, *mmap_filename;
u64 prev_time = 0;
bool found_cmd_mmap = false,
found_libc_mmap = false,
@@ -212,6 +212,7 @@ int test__PERF_RECORD(void)
if ((type == PERF_RECORD_COMM ||
type == PERF_RECORD_MMAP ||
+ type == PERF_RECORD_MMAP2 ||
type == PERF_RECORD_FORK ||
type == PERF_RECORD_EXIT) &&
(pid_t)event->comm.pid != evlist->workload.pid) {
@@ -220,7 +221,8 @@ int test__PERF_RECORD(void)
}
if ((type == PERF_RECORD_COMM ||
- type == PERF_RECORD_MMAP) &&
+ type == PERF_RECORD_MMAP ||
+ type == PERF_RECORD_MMAP2) &&
event->comm.pid != event->comm.tid) {
pr_debug("%s with different pid/tid!\n", name);
++errs;
@@ -236,7 +238,12 @@ int test__PERF_RECORD(void)
case PERF_RECORD_EXIT:
goto found_exit;
case PERF_RECORD_MMAP:
- bname = strrchr(event->mmap.filename, '/');
+ mmap_filename = event->mmap.filename;
+ goto check_bname;
+ case PERF_RECORD_MMAP2:
+ mmap_filename = event->mmap2.filename;
+ check_bname:
+ bname = strrchr(mmap_filename, '/');
if (bname != NULL) {
if (!found_cmd_mmap)
found_cmd_mmap = !strcmp(bname + 1, cmd);
@@ -245,7 +252,7 @@ int test__PERF_RECORD(void)
if (!found_ld_mmap)
found_ld_mmap = !strncmp(bname + 1, "ld", 2);
} else if (!found_vdso_mmap)
- found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]");
+ found_vdso_mmap = !strcmp(mmap_filename, "[vdso]");
break;
case PERF_RECORD_SAMPLE:
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index c048b58..e0ac713 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -39,5 +39,6 @@ int test__perf_time_to_tsc(void);
int test__code_reading(void);
int test__sample_parsing(void);
int test__keep_tracking(void);
+int test__parse_no_sample_id_all(void);
#endif /* TESTS_H */
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5b4fb33..194e2f4 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -350,9 +350,9 @@ static int hist_entry__period_snprintf(struct perf_hpp *hpp,
}
static int hist_entry__fprintf(struct hist_entry *he, size_t size,
- struct hists *hists, FILE *fp)
+ struct hists *hists,
+ char *bf, size_t bfsz, FILE *fp)
{
- char bf[512];
int ret;
struct perf_hpp hpp = {
.buf = bf,
@@ -360,8 +360,8 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
};
bool color = !symbol_conf.field_sep;
- if (size == 0 || size > sizeof(bf))
- size = hpp.size = sizeof(bf);
+ if (size == 0 || size > bfsz)
+ size = hpp.size = bfsz;
ret = hist_entry__period_snprintf(&hpp, he, color);
hist_entry__sort_snprintf(he, bf + ret, size - ret, hists);
@@ -392,6 +392,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
.ptr = hists_to_evsel(hists),
};
bool first = true;
+ size_t linesz;
+ char *line = NULL;
init_rem_hits();
@@ -479,6 +481,13 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
goto out;
print_entries:
+ linesz = hists__sort_list_width(hists) + 3 + 1;
+ line = malloc(linesz);
+ if (line == NULL) {
+ ret = -1;
+ goto out;
+ }
+
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
float percent = h->stat.period * 100.0 /
@@ -490,10 +499,10 @@ print_entries:
if (percent < min_pcnt)
continue;
- ret += hist_entry__fprintf(h, max_cols, hists, fp);
+ ret += hist_entry__fprintf(h, max_cols, hists, line, linesz, fp);
if (max_rows && ++nr_rows >= max_rows)
- goto out;
+ break;
if (h->ms.map == NULL && verbose > 1) {
__map_groups__fprintf_maps(&h->thread->mg,
@@ -501,6 +510,8 @@ print_entries:
fprintf(fp, "%.10s end\n", graph_dotted_line);
}
}
+
+ free(line);
out:
free(rem_sq_bracket);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index fb58409..7ded71d 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -67,6 +67,7 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
struct perf_tool build_id__mark_dso_hit_ops = {
.sample = build_id__mark_dso_hit,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.fork = perf_event__process_fork,
.exit = perf_event__exit_del_thread,
.attr = perf_event__process_attr,
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8d51f21..9b393e7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -11,6 +11,7 @@
static const char *perf_event__names[] = {
[0] = "TOTAL",
[PERF_RECORD_MMAP] = "MMAP",
+ [PERF_RECORD_MMAP2] = "MMAP2",
[PERF_RECORD_LOST] = "LOST",
[PERF_RECORD_COMM] = "COMM",
[PERF_RECORD_EXIT] = "EXIT",
@@ -186,7 +187,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
return -1;
}
- event->header.type = PERF_RECORD_MMAP;
+ event->header.type = PERF_RECORD_MMAP2;
/*
* Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
*/
@@ -197,7 +198,9 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
char prot[5];
char execname[PATH_MAX];
char anonstr[] = "//anon";
+ unsigned int ino;
size_t size;
+ ssize_t n;
if (fgets(bf, sizeof(bf), fp) == NULL)
break;
@@ -206,9 +209,16 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
strcpy(execname, "");
/* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
- sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n",
- &event->mmap.start, &event->mmap.len, prot,
- &event->mmap.pgoff, execname);
+ n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+ &event->mmap2.start, &event->mmap2.len, prot,
+ &event->mmap2.pgoff, &event->mmap2.maj,
+ &event->mmap2.min,
+ &ino, execname);
+
+ event->mmap2.ino = (u64)ino;
+
+ if (n != 8)
+ continue;
if (prot[2] != 'x')
continue;
@@ -217,15 +227,15 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
strcpy(execname, anonstr);
size = strlen(execname) + 1;
- memcpy(event->mmap.filename, execname, size);
+ memcpy(event->mmap2.filename, execname, size);
size = PERF_ALIGN(size, sizeof(u64));
- event->mmap.len -= event->mmap.start;
- event->mmap.header.size = (sizeof(event->mmap) -
- (sizeof(event->mmap.filename) - size));
- memset(event->mmap.filename + size, 0, machine->id_hdr_size);
- event->mmap.header.size += machine->id_hdr_size;
- event->mmap.pid = tgid;
- event->mmap.tid = pid;
+ event->mmap2.len -= event->mmap.start;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size));
+ memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+ event->mmap2.header.size += machine->id_hdr_size;
+ event->mmap2.pid = tgid;
+ event->mmap2.tid = pid;
if (process(tool, event, &synth_sample, machine) != 0) {
rc = -1;
@@ -527,6 +537,17 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
event->mmap.len, event->mmap.pgoff, event->mmap.filename);
}
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64
+ " %02x:%02x %"PRIu64" %"PRIu64"]: %s\n",
+ event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
+ event->mmap2.min, event->mmap2.ino,
+ event->mmap2.ino_generation,
+ event->mmap2.filename);
+}
+
int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -535,6 +556,14 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
return machine__process_mmap_event(machine, event);
}
+int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_mmap2_event(machine, event);
+}
+
size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
{
return fprintf(fp, "(%d:%d):(%d:%d)\n",
@@ -574,6 +603,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_MMAP:
ret += perf_event__fprintf_mmap(event, fp);
break;
+ case PERF_RECORD_MMAP2:
+ ret += perf_event__fprintf_mmap2(event, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 93130d8..c67ecc4 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -17,6 +17,19 @@ struct mmap_event {
char filename[PATH_MAX];
};
+struct mmap2_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ u64 start;
+ u64 len;
+ u64 pgoff;
+ u32 maj;
+ u32 min;
+ u64 ino;
+ u64 ino_generation;
+ char filename[PATH_MAX];
+};
+
struct comm_event {
struct perf_event_header header;
u32 pid, tid;
@@ -159,6 +172,7 @@ struct tracing_data_event {
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
+ struct mmap2_event mmap2;
struct comm_event comm;
struct fork_event fork;
struct lost_event lost;
@@ -208,6 +222,10 @@ int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process_fork(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -238,6 +256,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index b8727ae..f9f77be 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -64,6 +64,16 @@ void perf_evlist__set_id_pos(struct perf_evlist *evlist)
evlist->is_pos = first->is_pos;
}
+static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ list_for_each_entry(evsel, &evlist->entries, node)
+ perf_evsel__calc_id_pos(evsel);
+
+ perf_evlist__set_id_pos(evlist);
+}
+
static void perf_evlist__purge(struct perf_evlist *evlist)
{
struct perf_evsel *pos, *n;
@@ -446,20 +456,25 @@ static int perf_evlist__event2id(struct perf_evlist *evlist,
static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
union perf_event *event)
{
+ struct perf_evsel *first = perf_evlist__first(evlist);
struct hlist_head *head;
struct perf_sample_id *sid;
int hash;
u64 id;
if (evlist->nr_entries == 1)
- return perf_evlist__first(evlist);
+ return first;
+
+ if (!first->attr.sample_id_all &&
+ event->header.type != PERF_RECORD_SAMPLE)
+ return first;
if (perf_evlist__event2id(evlist, event, &id))
return NULL;
/* Synthesized events have an id of zero */
if (!id)
- return perf_evlist__first(evlist);
+ return first;
hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
head = &evlist->heads[hash];
@@ -915,6 +930,8 @@ int perf_evlist__open(struct perf_evlist *evlist)
struct perf_evsel *evsel;
int err;
+ perf_evlist__update_id_pos(evlist);
+
list_for_each_entry(evsel, &evlist->entries, node) {
err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
if (err < 0)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3612183..0ce9feb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -27,6 +27,7 @@
static struct {
bool sample_id_all;
bool exclude_guest;
+ bool mmap2;
} perf_missing_features;
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
@@ -676,8 +677,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
if (opts->sample_weight)
attr->sample_type |= PERF_SAMPLE_WEIGHT;
- attr->mmap = track;
- attr->comm = track;
+ attr->mmap = track;
+ attr->mmap2 = track && !perf_missing_features.mmap2;
+ attr->comm = track;
/*
* XXX see the function comment above
@@ -1016,6 +1018,8 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
}
fallback_missing_features:
+ if (perf_missing_features.mmap2)
+ evsel->attr.mmap2 = 0;
if (perf_missing_features.exclude_guest)
evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
retry_sample_id:
@@ -1080,8 +1084,11 @@ try_fallback:
if (err != -EINVAL || cpu > 0 || thread > 0)
goto out_close;
- if (!perf_missing_features.exclude_guest &&
- (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
+ if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
+ perf_missing_features.mmap2 = true;
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.exclude_guest &&
+ (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
perf_missing_features.exclude_guest = true;
goto fallback_missing_features;
} else if (!perf_missing_features.sample_id_all) {
@@ -1925,6 +1932,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
if_print(exclude_hv);
if_print(exclude_idle);
if_print(mmap);
+ if_print(mmap2);
if_print(comm);
if_print(freq);
if_print(inherit_stat);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a33197a..26441d0 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1351,6 +1351,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip);
+ fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2);
+ fprintf(fp, ", attr_mmap = %d", evsel->attr.mmap);
+ fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data);
if (evsel->ids) {
fprintf(fp, ", id = {");
for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 1dca61f..933d14f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -997,6 +997,54 @@ out_problem:
return -1;
}
+int machine__process_mmap2_event(struct machine *machine,
+ union perf_event *event)
+{
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread;
+ struct map *map;
+ enum map_type type;
+ int ret = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_mmap2(event, stdout);
+
+ if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ cpumode == PERF_RECORD_MISC_KERNEL) {
+ ret = machine__process_kernel_mmap_event(machine, event);
+ if (ret < 0)
+ goto out_problem;
+ return 0;
+ }
+
+ thread = machine__findnew_thread(machine, event->mmap2.pid,
+ event->mmap2.pid);
+ if (thread == NULL)
+ goto out_problem;
+
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
+ type = MAP__VARIABLE;
+ else
+ type = MAP__FUNCTION;
+
+ map = map__new(&machine->user_dsos, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff,
+ event->mmap2.pid, event->mmap2.maj,
+ event->mmap2.min, event->mmap2.ino,
+ event->mmap2.ino_generation,
+ event->mmap2.filename, type);
+
+ if (map == NULL)
+ goto out_problem;
+
+ thread__insert_map(thread, map);
+ return 0;
+
+out_problem:
+ dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
+ return 0;
+}
+
int machine__process_mmap_event(struct machine *machine, union perf_event *event)
{
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -1028,7 +1076,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
map = map__new(&machine->user_dsos, event->mmap.start,
event->mmap.len, event->mmap.pgoff,
- event->mmap.pid, event->mmap.filename,
+ event->mmap.pid, 0, 0, 0, 0,
+ event->mmap.filename,
type);
if (map == NULL)
@@ -1101,6 +1150,8 @@ int machine__process_event(struct machine *machine, union perf_event *event)
ret = machine__process_comm_event(machine, event); break;
case PERF_RECORD_MMAP:
ret = machine__process_mmap_event(machine, event); break;
+ case PERF_RECORD_MMAP2:
+ ret = machine__process_mmap2_event(machine, event); break;
case PERF_RECORD_FORK:
ret = machine__process_fork_event(machine, event); break;
case PERF_RECORD_EXIT:
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 0df925b..58a6be1 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -45,6 +45,7 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
int machine__process_fork_event(struct machine *machine, union perf_event *event);
int machine__process_lost_event(struct machine *machine, union perf_event *event);
int machine__process_mmap_event(struct machine *machine, union perf_event *event);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event);
int machine__process_event(struct machine *machine, union perf_event *event);
typedef void (*machine__process_t)(struct machine *machine, void *data);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 9e8304c..4f6680d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -48,7 +48,8 @@ void map__init(struct map *map, enum map_type type,
}
struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
- u64 pgoff, u32 pid, char *filename,
+ u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino,
+ u64 ino_gen, char *filename,
enum map_type type)
{
struct map *map = malloc(sizeof(*map));
@@ -62,6 +63,11 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
vdso = is_vdso_map(filename);
no_dso = is_no_dso_memory(filename);
+ map->maj = d_maj;
+ map->min = d_min;
+ map->ino = ino;
+ map->ino_generation = ino_gen;
+
if (anon) {
snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid);
filename = newfilename;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 2cc93cb..4886ca2 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -36,6 +36,9 @@ struct map {
bool erange_warned;
u32 priv;
u64 pgoff;
+ u32 maj, min; /* only valid for MMAP2 record */
+ u64 ino; /* only valid for MMAP2 record */
+ u64 ino_generation;/* only valid for MMAP2 record */
/* ip -> dso rip */
u64 (*map_ip)(struct map *, u64);
@@ -88,8 +91,9 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
void map__init(struct map *map, enum map_type type,
u64 start, u64 end, u64 pgoff, struct dso *dso);
struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
- u64 pgoff, u32 pid, char *filename,
- enum map_type type);
+ u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino,
+ u64 ino_gen,
+ char *filename, enum map_type type);
struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
void map__delete(struct map *map);
struct map *map__clone(struct map *map);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1fc0c62..51f5edf 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -351,6 +351,25 @@ static void perf_event__mmap_swap(union perf_event *event,
}
}
+static void perf_event__mmap2_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->mmap2.pid = bswap_32(event->mmap2.pid);
+ event->mmap2.tid = bswap_32(event->mmap2.tid);
+ event->mmap2.start = bswap_64(event->mmap2.start);
+ event->mmap2.len = bswap_64(event->mmap2.len);
+ event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
+ event->mmap2.maj = bswap_32(event->mmap2.maj);
+ event->mmap2.min = bswap_32(event->mmap2.min);
+ event->mmap2.ino = bswap_64(event->mmap2.ino);
+
+ if (sample_id_all) {
+ void *data = &event->mmap2.filename;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
{
event->fork.pid = bswap_32(event->fork.pid);
@@ -455,6 +474,7 @@ typedef void (*perf_event__swap_op)(union perf_event *event,
static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_MMAP] = perf_event__mmap_swap,
+ [PERF_RECORD_MMAP2] = perf_event__mmap2_swap,
[PERF_RECORD_COMM] = perf_event__comm_swap,
[PERF_RECORD_FORK] = perf_event__task_swap,
[PERF_RECORD_EXIT] = perf_event__task_swap,
@@ -504,6 +524,7 @@ static int flush_sample_queue(struct perf_session *s,
u64 limit = os->next_flush;
u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
unsigned idx = 0, progress_next = os->nr_samples / 16;
+ bool show_progress = limit == ULLONG_MAX;
int ret;
if (!tool->ordered_samples || !limit)
@@ -526,7 +547,7 @@ static int flush_sample_queue(struct perf_session *s,
os->last_flush = iter->timestamp;
list_del(&iter->list);
list_add(&iter->list, &os->sample_cache);
- if (++idx >= progress_next) {
+ if (show_progress && (++idx >= progress_next)) {
progress_next += os->nr_samples / 16;
ui_progress__update(idx, os->nr_samples,
"Processing time ordered events...");
@@ -850,7 +871,8 @@ static struct machine *
(cpumode == PERF_RECORD_MISC_GUEST_USER))) {
u32 pid;
- if (event->header.type == PERF_RECORD_MMAP)
+ if (event->header.type == PERF_RECORD_MMAP
+ || event->header.type == PERF_RECORD_MMAP2)
pid = event->mmap.pid;
else
pid = sample->pid;
@@ -977,6 +999,8 @@ static int perf_session_deliver_event(struct perf_session *session,
sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
+ case PERF_RECORD_MMAP2:
+ return tool->mmap2(tool, event, sample, machine);
case PERF_RECORD_COMM:
return tool->comm(tool, event, sample, machine);
case PERF_RECORD_FORK:
@@ -1619,52 +1643,26 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session,
const struct perf_evsel_str_handler *assocs,
size_t nr_assocs)
{
- struct perf_evlist *evlist = session->evlist;
- struct event_format *format;
struct perf_evsel *evsel;
- char *tracepoint, *name;
size_t i;
int err;
for (i = 0; i < nr_assocs; i++) {
- err = -ENOMEM;
- tracepoint = strdup(assocs[i].name);
- if (tracepoint == NULL)
- goto out;
-
- err = -ENOENT;
- name = strchr(tracepoint, ':');
- if (name == NULL)
- goto out_free;
-
- *name++ = '\0';
- format = pevent_find_event_by_name(session->pevent,
- tracepoint, name);
- if (format == NULL) {
- /*
- * Adding a handler for an event not in the session,
- * just ignore it.
- */
- goto next;
- }
-
- evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id);
+ /*
+ * Adding a handler for an event not in the session,
+ * just ignore it.
+ */
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name);
if (evsel == NULL)
- goto next;
+ continue;
err = -EEXIST;
if (evsel->handler.func != NULL)
- goto out_free;
+ goto out;
evsel->handler.func = assocs[i].handler;
-next:
- free(tracepoint);
}
err = 0;
out:
return err;
-
-out_free:
- free(tracepoint);
- goto out;
}
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 62b16b6..4385816 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -29,6 +29,7 @@ struct perf_tool {
event_sample sample,
read;
event_op mmap,
+ mmap2,
comm,
fork,
exit,
On Thu, Sep 12, 2013 at 6:38 AM, Ingo Molnar <[email protected]> wrote:
>
> Various fixes. (The -g perf report lockup you reported is only partially
> addressed, patches that fix the excessive runtime are still being worked
> on.)
So I pulled this and compiled a new version, and I have a new
complaint. The _bug_ probably is not new, but it happened because I
was compiling the tools/perf/ subdirectory while another terminal was
busy doing a "make allmodconfig" test build (hey, sue me, I do a lot
of them during the merge window).
When I compiled "perf" at the same time as doing a big kernel compile,
the kernel compile failed! I got a few odd "No such file or directory"
for temporary object files in the kernel build. That's not nice. Why
does "make" in the perf tools mess up a "make" of the main kernel?
That implies that the perf tools aren't really independent, and they
try to make at least part of the top-level build. Very annoying.
Another annoyance during that make was that "make install" seems to
want to re-make the thing I just built. That's absolutely horrible,
even if I've seen too many broken projects do that. Now, for perf it's
not as horrible as for some (because you can do "make install" as a
normal user), but it's still a pattern that needs to be called out and
needs to die. It's not just that it slows down "make install", it's
also that a normal pattern *should* be that you build things as a
normal user, and do "make install" as root.
Linus
On Thu, Sep 12, 2013 at 11:03 AM, Linus Torvalds
<[email protected]> wrote:
>
> When I compiled "perf" at the same time as doing a big kernel compile,
> the kernel compile failed
Oops. That may actually have been me being a bit *too* eager with a
"make allmodconfig" build. I can't reproduce it, and I'm starting to
suspect that I instead had two kernel compiles going, not one kernel
compile and a tools/perf/ compile.
"Yo Dawg, I heard you like kernel compiles, so I put a kernel
compile in your kernel compile so that you can compile the kernel
while you compile the kernel".
But at least the "make install" problem is repeatable, though.
Linus
Em Thu, Sep 12, 2013 at 11:10:37AM -0700, Linus Torvalds escreveu:
> On Thu, Sep 12, 2013 at 11:03 AM, Linus Torvalds <[email protected]> wrote:
> > When I compiled "perf" at the same time as doing a big kernel compile,
> > the kernel compile failed
> Oops. That may actually have been me being a bit *too* eager with a
> "make allmodconfig" build. I can't reproduce it, and I'm starting to
> suspect that I instead had two kernel compiles going, not one kernel
> compile and a tools/perf/ compile.
> "Yo Dawg, I heard you like kernel compiles, so I put a kernel
> compile in your kernel compile so that you can compile the kernel
> while you compile the kernel".
:-)
> But at least the "make install" problem is repeatable, though.
Well, I just tried it, and the only thing that gets rebuilt are the CHK
environment tests that try to figure out what can be built into perf,
i.e. perl, python, libaudit, etc.
Its something that annoys me as well, but not so much as to make me
figure out how to make those be done only if some source file changed.
But then, if you remove, say, libelf from your system so that you get a
perf tool that uses just /proc/kallsyms, it wouldn't detect it...
Perhaps in that case we should say: want a new build with a different
environment? Do a 'make clean' first.
But if I do it as a normal user and then try installing as root... it
behaves like you described, duh :-\
Because of this:
* new build flags or prefix
Will check...
- Arnaldo
On Thu, Sep 12, 2013 at 11:10 AM, Linus Torvalds
<[email protected]> wrote:
>
> But at least the "make install" problem is repeatable, though.
And now this new problem is repeatable too:
# On a fully built kernel tree
perf record -g -e cycles:pp make -j
results in
[ perf record: Woken up 27 times to write data ]
[ perf record: Captured and wrote 7.980 MB perf.data (~348659 samples) ]
0x1b4e0 [0]: failed to process type: -1970637019
where that number changes randomly, ie I get
0x28dc58 [0]: failed to process type: 99257493
0x4100 [0]: failed to process type: -1144359783
0x29050 [0]: failed to process type: -972156963
..
looks like perhaps some uninitialized variable somewhere?
Recording performance profiles of other (simpler?) loads still seems
to work. So it's something about that "make -j" that makes it crap out
(note that the tree is fully built, so not a lot actually gets *done*,
and the thing only takes a few seconds)
Linus
Em Thu, Sep 12, 2013 at 03:43:41PM -0300, Arnaldo Carvalho de Melo escreveu:
> But if I do it as a normal user and then try installing as root... it
> behaves like you described, duh :-\
>
> Because of this:
>
> * new build flags or prefix
>
> Will check...
This is the cause:
### Detect prefix changes
TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
$(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
@FLAGS='$(TRACK_CFLAGS)'; \
if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
echo 1>&2 " * new build flags or prefix"; \
echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
fi
That "prefix" thing, i.e.:
$(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
As user it is:
/home/acme/bin:libexec/perf-core:share/perf-core/templates:/home/acme
while as root it becomes:
/root/bin:libexec/perf-core:share/perf-core/templates:/root
And that causes the whole thing to be rebuilt when going from 'make' as normal
user to 'make install' as root.
All the other things in that TRACK_CFLAGS part is equal.
We probably got all this from the 'git' makefile, checking how it is done
there these days...
- Arnaldo
On Thu, Sep 12, 2013 at 12:12 PM, Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> We probably got all this from the 'git' makefile, checking how it is done
> there these days...
I suspect it's the same.
The thing I was really complaining about is how slow "make install"
is, since it seems to redo most everything I already did...
Linus
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> Em Thu, Sep 12, 2013 at 11:10:37AM -0700, Linus Torvalds escreveu:
> > On Thu, Sep 12, 2013 at 11:03 AM, Linus Torvalds <[email protected]> wrote:
> > > When I compiled "perf" at the same time as doing a big kernel compile,
> > > the kernel compile failed
>
> > Oops. That may actually have been me being a bit *too* eager with a
> > "make allmodconfig" build. I can't reproduce it, and I'm starting to
> > suspect that I instead had two kernel compiles going, not one kernel
> > compile and a tools/perf/ compile.
>
> > "Yo Dawg, I heard you like kernel compiles, so I put a kernel
> > compile in your kernel compile so that you can compile the kernel
> > while you compile the kernel".
>
> :-)
>
> > But at least the "make install" problem is repeatable, though.
>
> Well, I just tried it, and the only thing that gets rebuilt are the CHK
> environment tests that try to figure out what can be built into perf,
> i.e. perl, python, libaudit, etc.
>
> Its something that annoys me as well, but not so much as to make me
> figure out how to make those be done only if some source file changed.
>
> But then, if you remove, say, libelf from your system so that you get a
> perf tool that uses just /proc/kallsyms, it wouldn't detect it...
I don't think package removal is a particularly common usecase.
> Perhaps in that case we should say: want a new build with a different
> environment? Do a 'make clean' first.
Exactly. The most common pattern is:
make
# see warnings about missing dependencies
install missing packages
make
# no warnings, happy camper
Downgrades, package removals almost never happen in real life, let alone
in typical build flows.
So in the simplest approximation, if we detected just the best-case: 'all
libraries are present, we can do a full build' case and cached that fact
across builds (and cleared the cached flag on 'make clean'), that would
help speeding up the main usecase already.
But a cached flag per _successful_ config/feature-tests.mak testcase would
work well too. I.e. only repeat checks that failed in the past. Once it
succeeds there's no need to re-check.
Independent of all this is the the bug of repeat checks Linus noticed,
that's indeed annoying and should be fixed separately of any feature test
cache.
Thanks,
Ingo
On 9/12/13 11:43 AM, Arnaldo Carvalho de Melo wrote:
> Its something that annoys me as well, but not so much as to make me
> figure out how to make those be done only if some source file changed.
Jiri and I have both taken stabs at a config-based build rather than
probing. Just need to finish it.
David
Em Thu, Sep 12, 2013 at 12:58:28PM -0700, David Ahern escreveu:
> On 9/12/13 11:43 AM, Arnaldo Carvalho de Melo wrote:
> >Its something that annoys me as well, but not so much as to make me
> >figure out how to make those be done only if some source file changed.
>
> Jiri and I have both taken stabs at a config-based build rather than
> probing. Just need to finish it.
But even then it would be nice to have a:
make autoconfig
That is what we have now, but only then we would try to do all that
probing. Caching it somehow, as Ingo suggested, would get the best of
both worlds.
- Arnaldo
* Linus Torvalds <[email protected]> wrote:
> On Thu, Sep 12, 2013 at 11:03 AM, Linus Torvalds
> <[email protected]> wrote:
> >
> > When I compiled "perf" at the same time as doing a big kernel compile,
> > the kernel compile failed
>
> Oops. That may actually have been me being a bit *too* eager with a
> "make allmodconfig" build. I can't reproduce it, and I'm starting to
> suspect that I instead had two kernel compiles going, not one kernel
> compile and a tools/perf/ compile.
Btw., building perf in parallel to an ongoing kernel compile is something
I do almost daily, and I never saw problems due to that. A perf build is
not supposed to cause any side effect on the kernel build and vice versa.
So in general it should just work.
> "Yo Dawg, I heard you like kernel compiles, so I put a kernel compile
> in your kernel compile so that you can compile the kernel while you
> compile the kernel".
Heh, accidental dual kernel builds within the same kernel tree is
something I have a _lot_ of involuntary experience with, and I usually
recognize the error message patterns straight away ;-)
As a kernel hacker one learns useful skills all the time!
Thanks,
Ingo
* David Ahern <[email protected]> wrote:
> On 9/12/13 11:43 AM, Arnaldo Carvalho de Melo wrote:
> > Its something that annoys me as well, but not so much as to make me
> > figure out how to make those be done only if some source file changed.
>
> Jiri and I have both taken stabs at a config-based build rather than
> probing. Just need to finish it.
Mind outlining the approach you are thinking about?
Firstly, please don't even think about autotools. (Just forget it exists.)
Secondly, the way perf tries to build by auto-detecting the build
environment and auto-disabling bits it cannot build just yet is pretty
powerful. The core bits will build on just about any system, and our
fallbacks are really good.
The result is that perf will build on just about any random system,
without the user having to install any dependency. It would be really sad
to lose that aspect.
What I think would work best is what I outlined in the previous mail: to
cache successful feature test results and only re-do unsuccessful tests:
those are the ones that are expected to turn into successful tests in the
future, once the missing dependencies are installed.
Since most tests succeed even on a sparsely installed system, this trick
alone will speed up the checks big time.
Furthermore, this method would encourage people to install the
dependencies - and perf developers, who do many repeat builds after
trivial one-file changes, will typically have all dependencies installed
anyway, so for them such a caching feature would result in totally cached
feature tests and very fast build times.
Thanks,
Ingo
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> Em Thu, Sep 12, 2013 at 12:58:28PM -0700, David Ahern escreveu:
> > On 9/12/13 11:43 AM, Arnaldo Carvalho de Melo wrote:
> >
> > > Its something that annoys me as well, but not so much as to make me
> > > figure out how to make those be done only if some source file
> > > changed.
> >
> > Jiri and I have both taken stabs at a config-based build rather than
> > probing. Just need to finish it.
>
> But even then it would be nice to have a:
>
> make autoconfig
>
> That is what we have now, but only then we would try to do all that
> probing. Caching it somehow, as Ingo suggested, would get the best of
> both worlds.
Caching should be relatively straightforward: I'd suggest splitting up all
the feature tests current present in tools/perf/config/feature-tests.mak
into separate .c files which build into working binaries.
Instead of having 20 testcases crammed into a single Makefile:
comet:~/tip/tools/perf> git grep -w main config/feature-tests.mak
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(int argc, char *argv[])
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
config/feature-tests.mak:int main(void)
Why not put those into 20x standalone .c files and try to build them as an
explicit make command attempt from within the main Makefile? If the build
fails then the feature flag is set to off, if it succeeds it's defined.
That makes feature tests generally easier to debug, easier to understand
and generally easier to extend as well.
'Feature test binaries' that are already built will be 'cached'
automatically, and the result of their build attempt will be a quick
success.
This is relatively simple, well-known, powerful and I see no reason why it
wouldn't work. No separate config language or special files are needed -
just .c files, binaries and a Makefile.
'make clean' would get rid of the binaries.
[ It could even be made to work against package removal if we really
wanted to: if there's a common, known include file related to the
feature (such as /usr/include/libaudit.h), then that include file
could be added to the feature-test binary's list of dependencies. If the
package is removed and the include file goes away, then the dependency
breaks automatically and the binary is 'rebuilt', auto-detecting the
failed dependency. ]
Thanks,
Ingo
* Linus Torvalds <[email protected]> wrote:
> On Thu, Sep 12, 2013 at 11:10 AM, Linus Torvalds
> <[email protected]> wrote:
> >
> > But at least the "make install" problem is repeatable, though.
>
> And now this new problem is repeatable too:
>
> # On a fully built kernel tree
> perf record -g -e cycles:pp make -j
>
> results in
>
> [ perf record: Woken up 27 times to write data ]
> [ perf record: Captured and wrote 7.980 MB perf.data (~348659 samples) ]
> 0x1b4e0 [0]: failed to process type: -1970637019
>
> where that number changes randomly, ie I get
>
> 0x28dc58 [0]: failed to process type: 99257493
> 0x4100 [0]: failed to process type: -1144359783
> 0x29050 [0]: failed to process type: -972156963
> ..
>
> looks like perhaps some uninitialized variable somewhere?
>
> Recording performance profiles of other (simpler?) loads still seems to
> work. So it's something about that "make -j" that makes it crap out
> (note that the tree is fully built, so not a lot actually gets *done*,
> and the thing only takes a few seconds)
Hm, just to make sure, are you running a (very fresh) kernel that has this
fix included:
d008d5258e9c perf: Fix up MMAP2 buffer space reservation
?
Thanks,
Ingo
On Thu, Sep 12, 2013 at 1:33 PM, Ingo Molnar <[email protected]> wrote:
>
> Hm, just to make sure, are you running a (very fresh) kernel that has this
> fix included:
>
> d008d5258e9c perf: Fix up MMAP2 buffer space reservation
I've rebooted about five times since that report, so I'm not sure..
[ Short time passes ]
Hmm. That "reproducible" error is no longer reproducible for me on
current git (68f0d9d92e54). So if you fixed something recently, it's
possible I was running a pre-perf-pull kernel and an after-perf-pull
"perf" binary.
Linus
Em Thu, Sep 12, 2013 at 10:18:55PM +0200, Ingo Molnar escreveu:
> * David Ahern <[email protected]> wrote:
>
> > On 9/12/13 11:43 AM, Arnaldo Carvalho de Melo wrote:
>
> > > Its something that annoys me as well, but not so much as to make me
> > > figure out how to make those be done only if some source file changed.
> >
> > Jiri and I have both taken stabs at a config-based build rather than
> > probing. Just need to finish it.
>
> Mind outlining the approach you are thinking about?
>
> Firstly, please don't even think about autotools. (Just forget it exists.)
hehe, no, that wasn't considered.
> Secondly, the way perf tries to build by auto-detecting the build
> environment and auto-disabling bits it cannot build just yet is pretty
> powerful. The core bits will build on just about any system, and our
> fallbacks are really good.
That would remain as:
make -C tools/perf autoconfig
> The result is that perf will build on just about any random system,
> without the user having to install any dependency. It would be really sad
> to lose that aspect.
we will not
> What I think would work best is what I outlined in the previous mail: to
> cache successful feature test results and only re-do unsuccessful tests:
> those are the ones that are expected to turn into successful tests in the
> future, once the missing dependencies are installed.
that is an optimization to 'make autoconfig', i.e. what we have now,
improved.
> Since most tests succeed even on a sparsely installed system, this trick
> alone will speed up the checks big time.
>
> Furthermore, this method would encourage people to install the
> dependencies - and perf developers, who do many repeat builds after
> trivial one-file changes, will typically have all dependencies installed
> anyway, so for them such a caching feature would result in totally cached
> feature tests and very fast build times.
What he mentioned is the multiple attempts at doing:
make -C tools/perf menuconfig
and use kbuild to allow one to select what he/she wants to build, i.e.
using the kernel config system in perf.
In that case the feature checks would be triggered only for the features
selected, not for all perf currently selectable features.
- Arnaldo
* Ingo Molnar <[email protected]> wrote:
> Instead of having 20 testcases crammed into a single Makefile:
>
> comet:~/tip/tools/perf> git grep -w main config/feature-tests.mak
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(int argc, char *argv[])
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
> config/feature-tests.mak:int main(void)
>
> Why not put those into 20x standalone .c files and try to build them as
> an explicit make command attempt from within the main Makefile? If the
> build fails then the feature flag is set to off, if it succeeds it's
> defined.
>
> That makes feature tests generally easier to debug, easier to understand
> and generally easier to extend as well.
>
> 'Feature test binaries' that are already built will be 'cached'
> automatically, and the result of their build attempt will be a quick
> success.
A successfull 'make' attempt of a target file within a separate, simple
Makefile is very, very fast:
comet:~/tip/tools/power/x86/turbostat> perf stat --null --sync --repeat 10
make turbostat
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
make: `turbostat' is up to date.
Performance counter stats for 'make turbostat' (10 runs):
0.002146182 seconds time elapsed ( +- 0.35% )
That's 2 msecs only. 20 such tests will take less than 50 msecs in the
'cached' case.
Thanks,
Ingo
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> Em Thu, Sep 12, 2013 at 10:18:55PM +0200, Ingo Molnar escreveu:
> > * David Ahern <[email protected]> wrote:
> >
> > > On 9/12/13 11:43 AM, Arnaldo Carvalho de Melo wrote:
> >
> > > > Its something that annoys me as well, but not so much as to make me
> > > > figure out how to make those be done only if some source file changed.
> > >
> > > Jiri and I have both taken stabs at a config-based build rather than
> > > probing. Just need to finish it.
> >
> > Mind outlining the approach you are thinking about?
> >
> > Firstly, please don't even think about autotools. (Just forget it exists.)
>
> hehe, no, that wasn't considered.
/phew! :-)
> > Secondly, the way perf tries to build by auto-detecting the build
> > environment and auto-disabling bits it cannot build just yet is pretty
> > powerful. The core bits will build on just about any system, and our
> > fallbacks are really good.
>
> That would remain as:
>
> make -C tools/perf autoconfig
>
> > The result is that perf will build on just about any random system,
> > without the user having to install any dependency. It would be really
> > sad to lose that aspect.
>
> we will not
But it would be nice to keep building as simple as 'make'.
So I don't think splitting out the feature tests into a separate pass, to
be done manually by the user, is a step forward.
Speeding them up by caching their results, while cleaning up the
presentation of the testcases, on the other hand, would be a (big!) step
forward.
Thanks,
Ingo
* Linus Torvalds <[email protected]> wrote:
> On Thu, Sep 12, 2013 at 1:33 PM, Ingo Molnar <[email protected]> wrote:
> >
> > Hm, just to make sure, are you running a (very fresh) kernel that has this
> > fix included:
> >
> > d008d5258e9c perf: Fix up MMAP2 buffer space reservation
>
> I've rebooted about five times since that report, so I'm not sure..
>
> [ Short time passes ]
>
> Hmm. That "reproducible" error is no longer reproducible for me on
> current git (68f0d9d92e54). So if you fixed something recently, it's
> possible I was running a pre-perf-pull kernel and an after-perf-pull
> "perf" binary.
Yeah, so I think prior merging d008d5258e9c you'd see those artifacts with
new, MMAP2-aware perf tooling. Both older kernels would work fine with any
version of the tooling, and old tooling would work fine. Only new tooling
used on a kernel within the breakage window would be exposed to this.
I think the window of breakage is relatively small (you managed to hit
it), but should we add a quirk for this?
Thanks,
Ingo
On Thu, Sep 12, 2013 at 1:49 PM, Ingo Molnar <[email protected]> wrote:
>
>
> I think the window of breakage is relatively small (you managed to hit
> it), but should we add a quirk for this?
Was the breakage entirely inside the merge window? ie no released kernels?
If so, just ignore it.
Linus
* Linus Torvalds <[email protected]> wrote:
> On Thu, Sep 12, 2013 at 1:49 PM, Ingo Molnar <[email protected]> wrote:
> >
> >
> > I think the window of breakage is relatively small (you managed to hit
> > it), but should we add a quirk for this?
>
> Was the breakage entirely inside the merge window? ie no released
> kernels?
Absolutely! Starts at commit 0d99b7087324, stops at commit d008d5258e9c,
all inside the merge window.
> If so, just ignore it.
Ok, great!
Thanks,
Ingo
On 9/12/13 1:46 PM, Ingo Molnar wrote:
>>> Mind outlining the approach you are thinking about?
>>>
>>> Firstly, please don't even think about autotools. (Just forget it exists.)
>>
>> hehe, no, that wasn't considered.
>
> /phew! :-)
kconf approach of course:
https://lkml.org/lkml/2013/4/1/600
(minus the manual steps in that RFC).
David
* David Ahern <[email protected]> wrote:
> On 9/12/13 1:46 PM, Ingo Molnar wrote:
> >>>Mind outlining the approach you are thinking about?
> >>>
> >>>Firstly, please don't even think about autotools. (Just forget it exists.)
> >>
> >>hehe, no, that wasn't considered.
> >
> >/phew! :-)
>
> kconf approach of course:
> https://lkml.org/lkml/2013/4/1/600
> (minus the manual steps in that RFC).
I'm not sure what the end stage is where you'd like to arrive, but I don't
think that forcing a separate configuration pass is an improvement :-/
By default a simple 'make' should build perf to the maximum extent
possible, with no other input required from the user - with warnings
displayed as package install suggestions.
This:
Enable newt-based TUI (NEWT) [N/y] (NEW) y
Enable GTK-based UI (GTK2) [N/y] (NEW) n
Enable support for Bionic (e.g., Android platform) (BIONIC) [N/y] (NEW)
Development support for libc is available - glibc or bionic (LIBC) [N/y]
(NEW) y
Enable support for libelf (LIBELF) [N/y] (NEW) y
Enable support for libunwind (LIBUNWIND) [N/y] (NEW) y
Enable support for dwarf (DWARF) [N/y] (NEW) y
Enable support for demangle (DEMANGLE) [N/y] (NEW) y
Enable support for perl scripting engine (LIBPERL) [N/y] (NEW) y
Enable support for python scripting engine (LIBPYTHON) [N/y] (NEW) y
Enable support for libaudit (LIBAUDIT) [N/y/?] (NEW) y
Enable support for libnuma (LIBNUMA) [N/y/?] (NEW) y
Enable support for stack backtrace debugging (BACKTRACE) [N/y] (NEW) y
would be useful only as long as each listed option is actually
_buildable_. I.e. it should not be possible for the user to configure perf
in a way that makes the build fail.
I.e. this should go on top of a feature detection logic, allowing further
customization, for features that the user might want to turn off.
Thanks,
Ingo
On 9/12/13 2:18 PM, Ingo Molnar wrote:
>> kconf approach of course:
>> https://lkml.org/lkml/2013/4/1/600
>> (minus the manual steps in that RFC).
>
> I'm not sure what the end stage is where you'd like to arrive, but I don't
> think that forcing a separate configuration pass is an improvement :-/
once I have a working config -- say one for a target (minimal build) and
one for analysis (more but not all features -- eg., no gtk) I never see
auto-probing again. At best some new feature (config) comes along and
the config has to be updated again, but kconf only inquires about new
features.
>
> By default a simple 'make' should build perf to the maximum extent
> possible, with no other input required from the user - with warnings
> displayed as package install suggestions.
By default there is no config. Autoprobing generates a first one or a
user can specify a defconfig.
David
* David Ahern <[email protected]> wrote:
> > By default a simple 'make' should build perf to the maximum extent
> > possible, with no other input required from the user - with warnings
> > displayed as package install suggestions.
>
> By default there is no config. Autoprobing generates a first one or a
> user can specify a defconfig.
This could work if there's not two but three states for individual
features:
- autoprobe
- on
- off
and if autoprobe, if a system feature has been probed successfully,
automatically turned 'autoprobe' entries into 'on'.
That would give us the best of all worlds - autodetection, configurability
and caching:
- initial user types 'make' and gets a .config that has almost all
entries 'on', a few 'autoprobe'.
- once the user installs a dependency, the corresponding .config entry
turns into 'on'.
- the regular user or developers would have libraries that turn all
entries in the .config to 'on'.
- if a user is genuinely uninterested in a feature, he can mark it 'off',
which would then stay off permanently. This could also be used by
embedded/specialized builds.
- other specialized users, like distro builds, could use a .config with
all entries 'on' and could enforce the presence of all dependencies for
a successful build. [We could add 'make allyesconfig' to help that.]
Thanks,
Ingo
Hi,
On 13 September 2013 07:09, Ingo Molnar <[email protected]> wrote:
>
> * David Ahern <[email protected]> wrote:
>
>> > By default a simple 'make' should build perf to the maximum extent
>> > possible, with no other input required from the user - with warnings
>> > displayed as package install suggestions.
>>
>> By default there is no config. Autoprobing generates a first one or a
>> user can specify a defconfig.
>
> This could work if there's not two but three states for individual
> features:
>
> - autoprobe
> - on
> - off
>
> and if autoprobe, if a system feature has been probed successfully,
> automatically turned 'autoprobe' entries into 'on'.
>
> That would give us the best of all worlds - autodetection, configurability
> and caching:
>
> - initial user types 'make' and gets a .config that has almost all
> entries 'on', a few 'autoprobe'.
>
> - once the user installs a dependency, the corresponding .config entry
> turns into 'on'.
>
> - the regular user or developers would have libraries that turn all
> entries in the .config to 'on'.
>
> - if a user is genuinely uninterested in a feature, he can mark it 'off',
> which would then stay off permanently. This could also be used by
> embedded/specialized builds.
>
> - other specialized users, like distro builds, could use a .config with
> all entries 'on' and could enforce the presence of all dependencies for
> a successful build. [We could add 'make allyesconfig' to help that.]
Is there a way to detect the presence of a dependency and _also_ check
its version? Some new features are depending on a recent version of a
library, e.g. dwarf unwinding depends on libunwind >= 1.1 (cf.
http://www.spinics.net/lists/kernel/msg1598951.html).
Thanks,
Jean
>
> Thanks,
>
> Ingo
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
* Jean Pihet <[email protected]> wrote:
> Hi,
>
> On 13 September 2013 07:09, Ingo Molnar <[email protected]> wrote:
> >
> > * David Ahern <[email protected]> wrote:
> >
> >> > By default a simple 'make' should build perf to the maximum extent
> >> > possible, with no other input required from the user - with warnings
> >> > displayed as package install suggestions.
> >>
> >> By default there is no config. Autoprobing generates a first one or a
> >> user can specify a defconfig.
> >
> > This could work if there's not two but three states for individual
> > features:
> >
> > - autoprobe
> > - on
> > - off
> >
> > and if autoprobe, if a system feature has been probed successfully,
> > automatically turned 'autoprobe' entries into 'on'.
> >
> > That would give us the best of all worlds - autodetection, configurability
> > and caching:
> >
> > - initial user types 'make' and gets a .config that has almost all
> > entries 'on', a few 'autoprobe'.
> >
> > - once the user installs a dependency, the corresponding .config entry
> > turns into 'on'.
> >
> > - the regular user or developers would have libraries that turn all
> > entries in the .config to 'on'.
> >
> > - if a user is genuinely uninterested in a feature, he can mark it 'off',
> > which would then stay off permanently. This could also be used by
> > embedded/specialized builds.
> >
> > - other specialized users, like distro builds, could use a .config with
> > all entries 'on' and could enforce the presence of all dependencies for
> > a successful build. [We could add 'make allyesconfig' to help that.]
>
> Is there a way to detect the presence of a dependency and _also_ check
> its version? Some new features are depending on a recent version of a
> library, e.g. dwarf unwinding depends on libunwind >= 1.1 (cf.
> http://www.spinics.net/lists/kernel/msg1598951.html).
Yeah, see the testcases in tools/perf/config/feature-tests.mak, they
typically include the latest library API usages, which will fail on older
versions.
That kind of 'does it actually work?' test is a lot more robust than
explicit version checks, and combined with caching it should be fast and
parallelizable as well. (One of the problems of the current simple
implementation of the feature tests is that they are 20 serial tests with
no parallelization.)
Thanks,
Ingo
On 13 September 2013 11:45, Ingo Molnar <[email protected]> wrote:
>
> * Jean Pihet <[email protected]> wrote:
>
>> Hi,
>>
>> On 13 September 2013 07:09, Ingo Molnar <[email protected]> wrote:
>> >
>> > * David Ahern <[email protected]> wrote:
>> >
>> >> > By default a simple 'make' should build perf to the maximum extent
>> >> > possible, with no other input required from the user - with warnings
>> >> > displayed as package install suggestions.
>> >>
>> >> By default there is no config. Autoprobing generates a first one or a
>> >> user can specify a defconfig.
>> >
>> > This could work if there's not two but three states for individual
>> > features:
>> >
>> > - autoprobe
>> > - on
>> > - off
>> >
>> > and if autoprobe, if a system feature has been probed successfully,
>> > automatically turned 'autoprobe' entries into 'on'.
>> >
>> > That would give us the best of all worlds - autodetection, configurability
>> > and caching:
>> >
>> > - initial user types 'make' and gets a .config that has almost all
>> > entries 'on', a few 'autoprobe'.
>> >
>> > - once the user installs a dependency, the corresponding .config entry
>> > turns into 'on'.
>> >
>> > - the regular user or developers would have libraries that turn all
>> > entries in the .config to 'on'.
>> >
>> > - if a user is genuinely uninterested in a feature, he can mark it 'off',
>> > which would then stay off permanently. This could also be used by
>> > embedded/specialized builds.
>> >
>> > - other specialized users, like distro builds, could use a .config with
>> > all entries 'on' and could enforce the presence of all dependencies for
>> > a successful build. [We could add 'make allyesconfig' to help that.]
>>
>> Is there a way to detect the presence of a dependency and _also_ check
>> its version? Some new features are depending on a recent version of a
>> library, e.g. dwarf unwinding depends on libunwind >= 1.1 (cf.
>> http://www.spinics.net/lists/kernel/msg1598951.html).
>
> Yeah, see the testcases in tools/perf/config/feature-tests.mak, they
> typically include the latest library API usages, which will fail on older
> versions.
Ok!
I just sent a patch to feature-tests.mak for the newly added dwarf
unwinding feature ('perf tools: Check libunwind for availability of
dwarf parsing feature').
Thanks,
Jean
>
> That kind of 'does it actually work?' test is a lot more robust than
> explicit version checks, and combined with caching it should be fast and
> parallelizable as well. (One of the problems of the current simple
> implementation of the feature tests is that they are 20 serial tests with
> no parallelization.)
>
> Thanks,
>
> Ingo
So, the discussion died down a bit, so just to demonstrate how feature
tests should be done IMO, here's a quick hack to perf that adds a new
'make feature-check' test-target and factors out 4 standalone feature
tests:
$ make feature-check
...
Testing features:
dwarf support: disabled
ELF support: enabled
glibc support: enabled
Bionic support: disabled
Repeat invocations only rebuild the testcases that failed. If no testcase
fails then nothing is rebuilt and the features check runs very fast.
Even in the worst-case when most testcases are rebuilt there's a big
improvement in runtime due to building the testcases in parallel.
NOTE: the new testcases are not fed back into perf's
NO_DWARF/NO_LIBELF/etc. flag hierarchy yet, the patch only demonstrates
that this method is a viable and fast method that solves most of our
current problems in this area.
Other advantages over the current config/feature-tests.mak method:
- The individual testcases can be built/tested in the feature-checks/
directory as well, individually
- The testcases are simple, plain, standalone C files, unlike
config/feature-tests.mak which suffers from escaping complications.
'make clean' in feature-checks/ gets rid of all testcase binaries.
Thanks,
Ingo
---------------------->
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 3a0ff7f..e6398e7 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -816,3 +816,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean
.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+
+#
+# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
+#
+feature-check-build:
+ @echo "Testing features:"
+ @-make -i -j -C feature-checks >/dev/null 2>&1
+
+feature-check: feature-check-build
+ @echo -n "dwarf support: "; [ -f feature-checks/test-dwarf ] && echo enabled || echo disabled
+ @echo -n "ELF support: "; [ -f feature-checks/test-libelf ] && echo enabled || echo disabled
+ @echo -n "glibc support: "; [ -f feature-checks/test-glibc ] && echo enabled || echo disabled
+ @echo -n "Bionic support: "; [ -f feature-checks/test-bionic ] && echo enabled || echo disabled
+
diff --git a/tools/perf/feature-checks/Makefile b/tools/perf/feature-checks/Makefile
new file mode 100644
index 0000000..dea10c8
--- /dev/null
+++ b/tools/perf/feature-checks/Makefile
@@ -0,0 +1,26 @@
+
+FILES=test-hello test-dwarf test-libelf test-glibc test-bionic
+
+all: $(FILES)
+
+###############################
+
+test-hello: test-hello.c
+ $(CC) -o $@ [email protected]
+
+test-dwarf: test-dwarf.c
+ $(CC) -o $@ [email protected] -ldw
+
+test-libelf: test-libelf.c
+ $(CC) -o $@ [email protected] -lelf
+
+test-glibc: test-glibc.c
+ $(CC) -o $@ [email protected]
+
+test-bionic: test-bionic.c
+ $(CC) -o $@ [email protected]
+
+###############################
+
+clean:
+ rm -f $(FILES)
diff --git a/tools/perf/feature-checks/test-bionic.c b/tools/perf/feature-checks/test-bionic.c
new file mode 100644
index 0000000..eac24e9
--- /dev/null
+++ b/tools/perf/feature-checks/test-bionic.c
@@ -0,0 +1,6 @@
+#include <android/api-level.h>
+
+int main(void)
+{
+ return __ANDROID_API__;
+}
diff --git a/tools/perf/feature-checks/test-dwarf.c b/tools/perf/feature-checks/test-dwarf.c
new file mode 100644
index 0000000..783dfcd
--- /dev/null
+++ b/tools/perf/feature-checks/test-dwarf.c
@@ -0,0 +1,9 @@
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
+
+int main(void)
+{
+ Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
+ return (long)dbg;
+}
diff --git a/tools/perf/feature-checks/test-glibc.c b/tools/perf/feature-checks/test-glibc.c
new file mode 100644
index 0000000..13c66a5
--- /dev/null
+++ b/tools/perf/feature-checks/test-glibc.c
@@ -0,0 +1,8 @@
+#include <gnu/libc-version.h>
+
+int main(void)
+{
+ const char *version = gnu_get_libc_version();
+ return (long)version;
+}
+
diff --git a/tools/perf/feature-checks/test-hello b/tools/perf/feature-checks/test-hello
new file mode 100755
index 0000000..6e9a668
Binary files /dev/null and b/tools/perf/feature-checks/test-hello differ
diff --git a/tools/perf/feature-checks/test-hello.c b/tools/perf/feature-checks/test-hello.c
new file mode 100644
index 0000000..c9f398d
--- /dev/null
+++ b/tools/perf/feature-checks/test-hello.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
diff --git a/tools/perf/feature-checks/test-libelf b/tools/perf/feature-checks/test-libelf
new file mode 100755
index 0000000..bafde82
Binary files /dev/null and b/tools/perf/feature-checks/test-libelf differ
diff --git a/tools/perf/feature-checks/test-libelf.c b/tools/perf/feature-checks/test-libelf.c
new file mode 100644
index 0000000..1a08f97
--- /dev/null
+++ b/tools/perf/feature-checks/test-libelf.c
@@ -0,0 +1,7 @@
+#include <libelf.h>
+
+int main(void)
+{
+ Elf *elf = elf_begin(0, ELF_C_READ, 0);
+ return (long)elf;
+}
This series (with combo patch attached) implements (much) faster
perf-tools feature-auto-detection.
I used 3 tricks to implement feature auto-dependencies and to speed up
feature detection:
- standalone Makefile in config/feature-checks/ built in parallel
- split-out standalone .c files in config/feature-checks/*.c
- used GCC's auto-dependency generation feature (-MD) to track the
effects of system library addition/removal.
Before the changes a fully cached re-build of an already built tree took
2.6 second:
$ perf stat --null --sync --repeat 3 -a make Makefile
2.669467209 seconds time elapsed ( +- 0.07% )
After the changes it takes only 0.6 seconds:
0.599161560 seconds time elapsed ( +- 0.25% )
So the empty build got 4.4x faster.
The 'cache cold' full rebuild got faster as well:
$ perf stat --null --sync --pre 'make clean; make -C config/feature-checks/ clean' --repeat 3 -a make Makefile
before: 2.671373213 seconds time elapsed ( +- 0.12% )
after: 0.959772417 seconds time elapsed ( +- 0.22% )
That's 2.7x faster.
The series also implements new feature detection output, which looks like
this:
Auto-detecting system features:
... stackprotector-all: [ on ]
... volatile-register-var: [ on ]
... fortify-source: [ on ]
... libelf: [ on ]
... libelf-mmap: [ on ]
... glibc: [ on ]
... dwarf: [ on ]
... libelf-getphdrnum: [ on ]
... libunwind: [ on ]
... libaudit: [ on ]
... libslang: [ on ]
... gtk2: [ on ]
... gtk2-infobar: [ on ]
... libperl: [ OFF ]
... libpython: [ on ]
... libpython-version: [ on ]
... libbfd: [ on ]
... strlcpy: [ OFF ]
... on-exit: [ on ]
... backtrace: [ on ]
... libnuma: [ on ]
This can be used to get an overview about feature detection status. Users
will want to maximize the number of 'on' entries.
An 'on' feature is 'cached' in that its feature-check binary is present
during future builds:
$ ls -l config/feature-checks/test-libaudit
-rwxrwxr-x 1 mingo mingo 7264 Sep 30 18:31 config/feature-checks/test-libaudit
and it's only remade if its underlying system libraries are changed:
removed or updated. Note that unlike the hack-version I sent before,
cached features immediately turn into 'OFF' state once a dependent package
is removed.
'OFF' features failed to pass due to missing dependencies. Those are
re-made on every build. The fewer of them, the faster the build gets.
The series also streamlines feature names as exposed to the rest of perf:
it standardizes on the HAVE_{FEATURE}_SUPPORT pattern. We used to have a
mixture of HAVE_{FEATURE} and {FEATURE}_SUPPORT constants.
There's still the annoyance left that Linus reported originally: that the
config pass is repeated twice on 'make install' - this results in the
feature matrix to be output twice.
Comments, suggestions, bugreports are welcome.
Thanks,
Ingo
----------------------->
Subject: perf autodep: Implement faster auto-detection of features
From: Ingo Molnar <[email protected]>
Date: Mon Sep 30 18:14:50 CEST 2013
---
tools/perf/Makefile | 1
tools/perf/arch/x86/include/perf_regs.h | 6
tools/perf/arch/x86/util/unwind.c | 4
tools/perf/bench/mem-memcpy-arch.h | 2
tools/perf/bench/mem-memcpy.c | 2
tools/perf/bench/mem-memset-arch.h | 2
tools/perf/bench/mem-memset.c | 2
tools/perf/builtin-bench.c | 4
tools/perf/builtin-inject.c | 2
tools/perf/builtin-probe.c | 14
tools/perf/builtin-record.c | 12
tools/perf/config/Makefile | 237 +++++-----
tools/perf/config/feature-checks/Makefile | 110 ++++
tools/perf/config/feature-checks/test-backtrace.c | 10
tools/perf/config/feature-checks/test-bionic.c | 6
tools/perf/config/feature-checks/test-dwarf.c | 9
tools/perf/config/feature-checks/test-fortify-source.c | 6
tools/perf/config/feature-checks/test-glibc.c | 8
tools/perf/config/feature-checks/test-gtk2-infobar.c | 10
tools/perf/config/feature-checks/test-gtk2.c | 10
tools/perf/config/feature-checks/test-hello.c | 6
tools/perf/config/feature-checks/test-libaudit.c | 7
tools/perf/config/feature-checks/test-libbfd.c | 7
tools/perf/config/feature-checks/test-libelf-getphdrnum.c | 7
tools/perf/config/feature-checks/test-libelf-mmap.c | 7
tools/perf/config/feature-checks/test-libelf.c | 7
tools/perf/config/feature-checks/test-libnuma.c | 8
tools/perf/config/feature-checks/test-libperl.c | 9
tools/perf/config/feature-checks/test-libpython-version.c | 10
tools/perf/config/feature-checks/test-libpython.c | 7
tools/perf/config/feature-checks/test-libslang.c | 6
tools/perf/config/feature-checks/test-libunwind.c | 20
tools/perf/config/feature-checks/test-on-exit.c | 6
tools/perf/config/feature-checks/test-stackprotector-all.c | 6
tools/perf/config/feature-checks/test-stackprotector.c | 6
tools/perf/config/feature-checks/test-strlcpy.c | 8
tools/perf/config/feature-checks/test-volatile-register-var.c | 6
tools/perf/perf.c | 4
tools/perf/ui/gtk/browser.c | 2
tools/perf/ui/gtk/gtk.h | 4
tools/perf/ui/gtk/util.c | 4
tools/perf/ui/ui.h | 4
tools/perf/util/annotate.h | 4
tools/perf/util/cache.h | 2
tools/perf/util/generate-cmdlist.sh | 4
tools/perf/util/hist.h | 4
tools/perf/util/include/dwarf-regs.h | 2
tools/perf/util/map.c | 2
tools/perf/util/path.c | 2
tools/perf/util/perf_regs.h | 4
tools/perf/util/probe-event.c | 4
tools/perf/util/probe-finder.h | 4
tools/perf/util/symbol-elf.c | 2
tools/perf/util/symbol.h | 8
tools/perf/util/unwind.h | 4
tools/perf/util/util.c | 4
56 files changed, 491 insertions(+), 167 deletions(-)
Index: tip/tools/perf/Makefile
===================================================================
--- tip.orig/tools/perf/Makefile
+++ tip/tools/perf/Makefile
@@ -816,3 +816,4 @@ clean: $(LIBTRACEEVENT)-clean $(LIBLK)-c
.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+
Index: tip/tools/perf/arch/x86/include/perf_regs.h
===================================================================
--- tip.orig/tools/perf/arch/x86/include/perf_regs.h
+++ tip/tools/perf/arch/x86/include/perf_regs.h
@@ -5,7 +5,7 @@
#include "../../util/types.h"
#include <asm/perf_regs.h>
-#ifndef ARCH_X86_64
+#ifndef HAVE_ARCH_X86_64_SUPPORT
#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
#else
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
@@ -52,7 +52,7 @@ static inline const char *perf_reg_name(
return "FS";
case PERF_REG_X86_GS:
return "GS";
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
case PERF_REG_X86_R8:
return "R8";
case PERF_REG_X86_R9:
@@ -69,7 +69,7 @@ static inline const char *perf_reg_name(
return "R14";
case PERF_REG_X86_R15:
return "R15";
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
default:
return NULL;
}
Index: tip/tools/perf/arch/x86/util/unwind.c
===================================================================
--- tip.orig/tools/perf/arch/x86/util/unwind.c
+++ tip/tools/perf/arch/x86/util/unwind.c
@@ -4,7 +4,7 @@
#include "perf_regs.h"
#include "../../util/unwind.h"
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
int unwind__arch_reg_id(int regnum)
{
int id;
@@ -108,4 +108,4 @@ int unwind__arch_reg_id(int regnum)
return id;
}
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
Index: tip/tools/perf/bench/mem-memcpy-arch.h
===================================================================
--- tip.orig/tools/perf/bench/mem-memcpy-arch.h
+++ tip/tools/perf/bench/mem-memcpy-arch.h
@@ -1,5 +1,5 @@
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) \
extern void *fn(void *, const void *, size_t);
Index: tip/tools/perf/bench/mem-memcpy.c
===================================================================
--- tip.orig/tools/perf/bench/mem-memcpy.c
+++ tip/tools/perf/bench/mem-memcpy.c
@@ -58,7 +58,7 @@ struct routine routines[] = {
{ "default",
"Default memcpy() provided by glibc",
memcpy },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
#include "mem-memcpy-x86-64-asm-def.h"
Index: tip/tools/perf/bench/mem-memset-arch.h
===================================================================
--- tip.orig/tools/perf/bench/mem-memset-arch.h
+++ tip/tools/perf/bench/mem-memset-arch.h
@@ -1,5 +1,5 @@
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) \
extern void *fn(void *, int, size_t);
Index: tip/tools/perf/bench/mem-memset.c
===================================================================
--- tip.orig/tools/perf/bench/mem-memset.c
+++ tip/tools/perf/bench/mem-memset.c
@@ -58,7 +58,7 @@ static const struct routine routines[] =
{ "default",
"Default memset() provided by glibc",
memset },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) { name, desc, fn },
#include "mem-memset-x86-64-asm-def.h"
Index: tip/tools/perf/builtin-bench.c
===================================================================
--- tip.orig/tools/perf/builtin-bench.c
+++ tip/tools/perf/builtin-bench.c
@@ -35,7 +35,7 @@ struct bench_suite {
/* sentinel: easy for help */
#define suite_all { "all", "Test all benchmark suites", NULL }
-#ifdef LIBNUMA_SUPPORT
+#ifdef HAVE_LIBNUMA_SUPPORT
static struct bench_suite numa_suites[] = {
{ "mem",
"Benchmark for NUMA workloads",
@@ -80,7 +80,7 @@ struct bench_subsys {
};
static struct bench_subsys subsystems[] = {
-#ifdef LIBNUMA_SUPPORT
+#ifdef HAVE_LIBNUMA_SUPPORT
{ "numa",
"NUMA scheduling and MM behavior",
numa_suites },
Index: tip/tools/perf/builtin-inject.c
===================================================================
--- tip.orig/tools/perf/builtin-inject.c
+++ tip/tools/perf/builtin-inject.c
@@ -231,7 +231,7 @@ static int perf_event__inject_buildid(st
* account this as unresolved.
*/
} else {
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
pr_warning("no symbols found in %s, maybe "
"install a debug package?\n",
al.map->dso->long_name);
Index: tip/tools/perf/builtin-probe.c
===================================================================
--- tip.orig/tools/perf/builtin-probe.c
+++ tip/tools/perf/builtin-probe.c
@@ -173,7 +173,7 @@ static int opt_set_target(const struct o
if (str && !params.target) {
if (!strcmp(opt->long_name, "exec"))
params.uprobes = true;
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
else if (!strcmp(opt->long_name, "module"))
params.uprobes = false;
#endif
@@ -187,7 +187,7 @@ static int opt_set_target(const struct o
return ret;
}
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
static int opt_show_lines(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
{
@@ -257,7 +257,7 @@ int cmd_probe(int argc, const char **arg
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list",
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
"perf probe [<options>] --line 'LINEDESC'",
"perf probe [<options>] --vars 'PROBEPOINT'",
#endif
@@ -271,7 +271,7 @@ int cmd_probe(int argc, const char **arg
OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
opt_del_probe_event),
OPT_CALLBACK('a', "add", NULL,
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
" [[NAME=]ARG ...]",
#else
@@ -283,7 +283,7 @@ int cmd_probe(int argc, const char **arg
"\t\tFUNC:\tFunction name\n"
"\t\tOFF:\tOffset from function entry (in byte)\n"
"\t\t%return:\tPut the probe at function return\n"
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
"\t\tSRC:\tSource code path\n"
"\t\tRL:\tRelative line number from function entry.\n"
"\t\tAL:\tAbsolute line number in file.\n"
@@ -296,7 +296,7 @@ int cmd_probe(int argc, const char **arg
opt_add_probe_event),
OPT_BOOLEAN('f', "force", ¶ms.force_add, "forcibly add events"
" with existing name"),
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines),
@@ -408,7 +408,7 @@ int cmd_probe(int argc, const char **arg
return ret;
}
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
if (params.show_lines && !params.uprobes) {
if (params.mod_events) {
pr_err(" Error: Don't use --line with"
Index: tip/tools/perf/builtin-record.c
===================================================================
--- tip.orig/tools/perf/builtin-record.c
+++ tip/tools/perf/builtin-record.c
@@ -29,7 +29,7 @@
#include <sched.h>
#include <sys/mman.h>
-#ifndef HAVE_ON_EXIT
+#ifndef HAVE_ON_EXIT_SUPPORT
#ifndef ATEXIT_MAX
#define ATEXIT_MAX 32
#endif
@@ -684,7 +684,7 @@ error:
return ret;
}
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
static int get_stack_size(char *str, unsigned long *_size)
{
char *endptr;
@@ -710,7 +710,7 @@ static int get_stack_size(char *str, uns
max_size, str);
return -1;
}
-#endif /* LIBUNWIND_SUPPORT */
+#endif /* HAVE_LIBUNWIND_SUPPORT */
int record_parse_callchain_opt(const struct option *opt,
const char *arg, int unset)
@@ -748,7 +748,7 @@ int record_parse_callchain_opt(const str
"needed for -g fp\n");
break;
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
/* Dwarf style */
} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
const unsigned long default_stack_dump_size = 8192;
@@ -768,7 +768,7 @@ int record_parse_callchain_opt(const str
if (!ret)
pr_debug("callchain: stack dump size %d\n",
opts->stack_dump_size);
-#endif /* LIBUNWIND_SUPPORT */
+#endif /* HAVE_LIBUNWIND_SUPPORT */
} else {
pr_err("callchain: Unknown -g option "
"value: %s\n", arg);
@@ -815,7 +815,7 @@ static struct perf_record record = {
#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
#else
const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
Index: tip/tools/perf/config/Makefile
===================================================================
--- tip.orig/tools/perf/config/Makefile
+++ tip/tools/perf/config/Makefile
@@ -23,7 +23,7 @@ ifeq ($(ARCH),x86_64)
endif
ifeq (${IS_X86_64}, 1)
RAW_ARCH := x86_64
- CFLAGS += -DARCH_X86_64
+ CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
endif
NO_PERF_REGS := 0
@@ -31,7 +31,7 @@ ifeq ($(ARCH),x86_64)
endif
ifeq ($(NO_PERF_REGS),0)
- CFLAGS += -DHAVE_PERF_REGS
+ CFLAGS += -DHAVE_PERF_REGS_SUPPORT
endif
ifeq ($(src-perf),)
@@ -89,20 +89,55 @@ CFLAGS += -std=gnu99
EXTLIBS = -lelf -lpthread -lrt -lm -ldl
-ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
+feature_check = $(eval $(feature_check_code))
+define feature_check_code
+ feature-$(1) := $(shell make -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0)
+endef
+
+#
+# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
+#
+$(info )
+$(info Auto-detecting system features:)
+$(shell make -i -j -C config/feature-checks >/dev/null 2>&1)
+
+FEATURE_TESTS = stackprotector-all volatile-register-var fortify-source libelf libelf-mmap glibc dwarf libelf-getphdrnum libunwind libaudit libslang gtk2 gtk2-infobar libperl libpython libpython-version libbfd strlcpy on-exit backtrace libnuma
+
+$(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
+
+feature_print = $(eval $(feature_print_code))
+
+#
+# Print the result of the feature test:
+#
+define feature_print_code
+ ifeq ($(feature-$(1)), 1)
+ MSG := $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1))
+ else
+ MSG := $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1))
+ endif
+ $(info $(MSG))
+endef
+
+$(foreach feat,$(FEATURE_TESTS) DUMMY,$(call feature_print,$(feat)))
+
+# newline at the end of the feature printouts:
+$(info )
+
+ifeq ($(feature-stackprotector-all), 1)
CFLAGS += -fstack-protector-all
endif
-ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wstack-protector,-Wstack-protector),y)
+ifeq ($(feature-stackprotector), 1)
CFLAGS += -Wstack-protector
endif
-ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-Wvolatile-register-var),y)
+ifeq ($(feature-volatile-register-var), 1)
CFLAGS += -Wvolatile-register-var
endif
ifndef PERF_DEBUG
- ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y)
+ ifeq ($(feature-fortify-source), 1)
CFLAGS += -D_FORTIFY_SOURCE=2
endif
endif
@@ -128,84 +163,82 @@ CFLAGS += -I$(LIB_INCLUDE)
CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
ifndef NO_BIONIC
-ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
- BIONIC := 1
- EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
- EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
+ $(feature_check,bionic)
+ ifeq ($(feature-bionic), 1)
+ BIONIC := 1
+ EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
+ EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
+ endif
endif
-endif # NO_BIONIC
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
NO_LIBUNWIND := 1
else
-FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y)
- FLAGS_GLIBC=$(CFLAGS) $(LDFLAGS)
- ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y)
- LIBC_SUPPORT := 1
- endif
- ifeq ($(BIONIC),1)
- LIBC_SUPPORT := 1
- endif
- ifeq ($(LIBC_SUPPORT),1)
- msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev);
-
- NO_LIBELF := 1
- NO_DWARF := 1
- NO_DEMANGLE := 1
+ ifeq ($(feature-libelf), 0)
+ ifeq ($(feature-glibc), 1)
+ LIBC_SUPPORT := 1
+ endif
+ ifeq ($(BIONIC),1)
+ LIBC_SUPPORT := 1
+ endif
+ ifeq ($(LIBC_SUPPORT),1)
+ msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev);
+
+ NO_LIBELF := 1
+ NO_DWARF := 1
+ NO_DEMANGLE := 1
+ else
+ msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
+ endif
else
- msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
- endif
-else
- # for linking with debug library, run like:
- # make DEBUG=1 LIBDW_DIR=/opt/libdw/
- ifdef LIBDW_DIR
- LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
- LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
- endif
-
- FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lz -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS)
- ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y)
- msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
- NO_DWARF := 1
- endif # Dwarf support
-endif # SOURCE_LIBELF
+ # for linking with debug library, run like:
+ # make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ ifdef LIBDW_DIR
+ LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
+ LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
+ endif
+
+ ifneq ($(feature-dwarf), 1)
+ msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
+ NO_DWARF := 1
+ endif # Dwarf support
+ endif # SOURCE_LIBELF
endif # NO_LIBELF
ifndef NO_LIBELF
-CFLAGS += -DLIBELF_SUPPORT
-FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
-ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
- CFLAGS += -DLIBELF_MMAP
-endif
-ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y)
- CFLAGS += -DHAVE_ELF_GETPHDRNUM
-endif
+ CFLAGS += -DHAVE_LIBELF_SUPPORT
+ FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
-# include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+ ifeq ($(feature-libelf-mmap), 1)
+ CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
+ endif
-ifndef NO_DWARF
-ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
- msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
- NO_DWARF := 1
-else
- CFLAGS += -DDWARF_SUPPORT $(LIBDW_CFLAGS)
- LDFLAGS += $(LIBDW_LDFLAGS)
- EXTLIBS += -lelf -ldw
-endif # PERF_HAVE_DWARF_REGS
-endif # NO_DWARF
+ ifeq ($(feature-libelf-getphdrnum), 1)
+ CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
+ endif
+
+ # include ARCH specific config
+ -include $(src-perf)/arch/$(ARCH)/Makefile
+ ifndef NO_DWARF
+ ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
+ msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
+ NO_DWARF := 1
+ else
+ CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
+ LDFLAGS += $(LIBDW_LDFLAGS)
+ EXTLIBS += -lelf -ldw
+ endif # PERF_HAVE_DWARF_REGS
+ endif # NO_DWARF
endif # NO_LIBELF
ifndef NO_LIBELF
-CFLAGS += -DLIBELF_SUPPORT
-FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
-ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
- CFLAGS += -DLIBELF_MMAP
-endif # try-cc
+ CFLAGS += -DHAVE_LIBELF_SUPPORT
+ ifeq ($(feature-libelf-mmap), 1)
+ CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
+ endif
endif # NO_LIBELF
# There's only x86 (both 32 and 64) support for CFI unwind so far
@@ -214,34 +247,25 @@ ifneq ($(ARCH),x86)
endif
ifndef NO_LIBUNWIND
-# for linking with debug library, run like:
-# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
-ifdef LIBUNWIND_DIR
- LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include
- LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib
+ ifneq ($(feature-libunwind), 1)
+ msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
+ NO_LIBUNWIND := 1
+ endif
endif
-FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(CFLAGS) $(LIBUNWIND_LDFLAGS) $(LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y)
- msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
- NO_LIBUNWIND := 1
-endif # Libunwind support
-endif # NO_LIBUNWIND
-
ifndef NO_LIBUNWIND
- CFLAGS += -DLIBUNWIND_SUPPORT
+ CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
EXTLIBS += $(LIBUNWIND_LIBS)
CFLAGS += $(LIBUNWIND_CFLAGS)
LDFLAGS += $(LIBUNWIND_LDFLAGS)
-endif # NO_LIBUNWIND
+endif
ifndef NO_LIBAUDIT
- FLAGS_LIBAUDIT = $(CFLAGS) $(LDFLAGS) -laudit
- ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT),libaudit),y)
+ ifneq ($(feature-libaudit), 1)
msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
NO_LIBAUDIT := 1
else
- CFLAGS += -DLIBAUDIT_SUPPORT
+ CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
EXTLIBS += -laudit
endif
endif
@@ -251,28 +275,27 @@ ifdef NO_NEWT
endif
ifndef NO_SLANG
- FLAGS_SLANG=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang
- ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y)
+ ifneq ($(feature-libslang), 1)
msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev);
NO_SLANG := 1
else
# Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
CFLAGS += -I/usr/include/slang
- CFLAGS += -DSLANG_SUPPORT
+ CFLAGS += -DHAVE_SLANG_SUPPORT
EXTLIBS += -lslang
endif
endif
ifndef NO_GTK2
FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
- ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2),gtk2),y)
+ ifneq ($(feature-gtk2), 1)
msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
NO_GTK2 := 1
else
- ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR),y)
- CFLAGS += -DHAVE_GTK_INFO_BAR
+ ifeq ($(feature-gtk2-infobar), 1)
+ CFLAGS += -DHAVE_GTK_INFO_BAR_SUPPORT
endif
- CFLAGS += -DGTK2_SUPPORT
+ CFLAGS += -DHAVE_GTK2_SUPPORT
CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
endif
@@ -290,7 +313,7 @@ else
PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
- ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED),perl),y)
+ ifneq ($(feature-libperl), 1)
CFLAGS += -DNO_LIBPERL
NO_LIBPERL := 1
else
@@ -335,11 +358,11 @@ else
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
- ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED),python),y)
+ ifneq ($(feature-libpython), 1)
$(call disable-python,Python.h (for Python 2.x))
else
- ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED),python version),y)
+ ifneq ($(feature-libpython-version), 1)
$(warning Python 3 is not yet supported; please set)
$(warning PYTHON and/or PYTHON_CONFIG appropriately.)
$(warning If you also have Python 2 installed, then)
@@ -365,13 +388,12 @@ endif
ifdef NO_DEMANGLE
CFLAGS += -DNO_DEMANGLE
else
- ifdef HAVE_CPLUS_DEMANGLE
+ ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
EXTLIBS += -liberty
- CFLAGS += -DHAVE_CPLUS_DEMANGLE
+ CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
else
FLAGS_BFD=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd
- has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd)
- ifeq ($(has_bfd),y)
+ ifeq ($(feature-libbfd), 1)
EXTLIBS += -lbfd
else
FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
@@ -388,7 +410,7 @@ else
has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle)
ifeq ($(has_cplus_demangle),y)
EXTLIBS += -liberty
- CFLAGS += -DHAVE_CPLUS_DEMANGLE
+ CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
else
msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
CFLAGS += -DNO_DEMANGLE
@@ -400,30 +422,29 @@ else
endif
ifndef NO_STRLCPY
- ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY),y)
- CFLAGS += -DHAVE_STRLCPY
+ ifeq ($(feature-strlcpy), 1)
+ CFLAGS += -DHAVE_STRLCPY_SUPPORT
endif
endif
ifndef NO_ON_EXIT
- ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT),y)
- CFLAGS += -DHAVE_ON_EXIT
+ ifeq ($(feature-on-exit), 1)
+ CFLAGS += -DHAVE_ON_EXIT_SUPPORT
endif
endif
ifndef NO_BACKTRACE
- ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DBACKTRACE_SUPPORT),y)
- CFLAGS += -DBACKTRACE_SUPPORT
+ ifeq ($(feature-backtrace), 1)
+ CFLAGS += -DHAVE_BACKTRACE_SUPPORT
endif
endif
ifndef NO_LIBNUMA
- FLAGS_LIBNUMA = $(CFLAGS) $(LDFLAGS) -lnuma
- ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
+ ifeq ($(feature-libnuma), 0)
msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
NO_LIBNUMA := 1
else
- CFLAGS += -DLIBNUMA_SUPPORT
+ CFLAGS += -DHAVE_LIBNUMA_SUPPORT
EXTLIBS += -lnuma
endif
endif
Index: tip/tools/perf/config/feature-checks/Makefile
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/Makefile
@@ -0,0 +1,110 @@
+
+FILES=test-hello test-dwarf test-libelf test-glibc test-bionic test-libnuma test-stackprotector-all test-stackprotector test-volatile-register-var test-fortify-source test-libelf-mmap test-libelf-getphdrnum test-libunwind test-libaudit test-libslang test-gtk2 test-gtk2-infobar test-libperl test-libpython test-libpython-version test-libbfd test-strlcp test-on-exit backtrace
+
+CC := $(CC) -MD
+
+all: $(FILES)
+
+###############################
+
+test-hello: test-hello.c
+ $(CC) -o $@ [email protected]
+
+test-stackprotector-all: test-stackprotector-all.c
+ $(CC) -o $@ [email protected] -Werror -fstack-protector-all
+
+test-stackprotector: test-stackprotector.c
+ $(CC) -o $@ [email protected] -Werror -fstack-protector
+
+test-volatile-register-var: test-volatile-register-var.c
+ $(CC) -o $@ [email protected] -Werror -Wvolatile-register-var
+
+test-fortify-source: test-fortify-source.c
+ $(CC) -o $@ [email protected] -O2 -Werror -D_FORTIFY_SOURCE=2
+
+test-dwarf: test-dwarf.c
+ $(CC) -o $@ [email protected] -ldw
+
+test-libelf: test-libelf.c
+ $(CC) -o $@ [email protected] -lelf
+
+test-libelf-mmap: test-libelf-mmap.c
+ $(CC) -o $@ [email protected] -lelf
+
+test-libelf-getphdrnum: test-libelf-getphdrnum.c
+ $(CC) -o $@ [email protected] -lelf
+
+test-glibc: test-glibc.c
+ $(CC) -o $@ [email protected]
+
+test-bionic: test-bionic.c
+ $(CC) -o $@ [email protected]
+
+test-libnuma: test-libnuma.c
+ $(CC) -o $@ [email protected] -lnuma
+
+test-libunwind: test-libunwind.c
+ $(CC) -o $@ [email protected] -lunwind -lunwind-x86_64 -lelf
+
+test-libaudit: test-libaudit.c
+ $(CC) -o $@ [email protected] -laudit
+
+test-libslang: test-libslang.c
+ $(CC) -o $@ [email protected] -I/usr/include/slang -lslang
+
+test-gtk2: test-gtk2.c
+ $(CC) -o $@ [email protected] $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
+
+test-gtk2-infobar: test-gtk2-infobar.c
+ $(CC) -o $@ [email protected] $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
+PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
+PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
+PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+
+test-libperl: test-libperl.c
+ $(CC) -o $@ [email protected] $(FLAGS_PERL_EMBED)
+
+override PYTHON := python
+override PYTHON_CONFIG := python-config
+
+escape-for-shell-sq = $(subst ','\'',$(1))
+shell-sq = '$(escape-for-shell-sq)'
+
+PYTHON_CONFIG_SQ = $(call shell-sq,$(PYTHON_CONFIG))
+
+PYTHON_EMBED_LDOPTS = $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
+PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
+PYTHON_EMBED_CCOPTS = $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+FLAGS_PYTHON_EMBED = $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+
+test-libpython: test-libpython.c
+ $(CC) -o $@ [email protected] $(FLAGS_PYTHON_EMBED)
+
+test-libpython-version: test-libpython-version.c
+ $(CC) -o $@ [email protected] $(FLAGS_PYTHON_EMBED)
+
+test-libbfd: test-libbfd.c
+ $(CC) -o $@ [email protected] -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl
+
+test-strlcpy: test-strlcpy.c
+ $(CC) -o $@ [email protected]
+
+test-on-exit: test-on-exit.c
+ $(CC) -o $@ [email protected]
+
+test-backtrace: test-backtrace.c
+ $(CC) -o $@ [email protected]
+
+-include *.d */*.d
+
+###############################
+
+clean:
+ rm -f $(FILES) *.d
Index: tip/tools/perf/config/feature-checks/test-backtrace.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-backtrace.c
@@ -0,0 +1,10 @@
+#include <execinfo.h>
+#include <stdio.h>
+
+int main(void)
+{
+ backtrace(NULL, 0);
+ backtrace_symbols(NULL, 0);
+
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-bionic.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-bionic.c
@@ -0,0 +1,6 @@
+#include <android/api-level.h>
+
+int main(void)
+{
+ return __ANDROID_API__;
+}
Index: tip/tools/perf/config/feature-checks/test-dwarf.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-dwarf.c
@@ -0,0 +1,9 @@
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
+
+int main(void)
+{
+ Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
+ return (long)dbg;
+}
Index: tip/tools/perf/config/feature-checks/test-fortify-source.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-fortify-source.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
Index: tip/tools/perf/config/feature-checks/test-glibc.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-glibc.c
@@ -0,0 +1,8 @@
+#include <gnu/libc-version.h>
+
+int main(void)
+{
+ const char *version = gnu_get_libc_version();
+ return (long)version;
+}
+
Index: tip/tools/perf/config/feature-checks/test-gtk2-infobar.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-gtk2-infobar.c
@@ -0,0 +1,10 @@
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+int main(void)
+{
+ gtk_info_bar_new();
+
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-gtk2.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-gtk2.c
@@ -0,0 +1,10 @@
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+int main(int argc, char *argv[])
+{
+ gtk_init(&argc, &argv);
+
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-hello.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-hello.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
Index: tip/tools/perf/config/feature-checks/test-libaudit.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libaudit.c
@@ -0,0 +1,7 @@
+#include <libaudit.h>
+
+int main(void)
+{
+ printf("error message: %s\n", audit_errno_to_name(0));
+ return audit_open();
+}
Index: tip/tools/perf/config/feature-checks/test-libbfd.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libbfd.c
@@ -0,0 +1,7 @@
+#include <bfd.h>
+
+int main(void)
+{
+ bfd_demangle(0, 0, 0);
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-libelf-getphdrnum.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libelf-getphdrnum.c
@@ -0,0 +1,7 @@
+#include <libelf.h>
+#
+int main(void)
+{
+ size_t dst;
+ return elf_getphdrnum(0, &dst);
+}
Index: tip/tools/perf/config/feature-checks/test-libelf-mmap.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libelf-mmap.c
@@ -0,0 +1,7 @@
+#include <libelf.h>
+#
+int main(void)
+{
+ Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
+ return (long)elf;
+}
Index: tip/tools/perf/config/feature-checks/test-libelf.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libelf.c
@@ -0,0 +1,7 @@
+#include <libelf.h>
+
+int main(void)
+{
+ Elf *elf = elf_begin(0, ELF_C_READ, 0);
+ return (long)elf;
+}
Index: tip/tools/perf/config/feature-checks/test-libnuma.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libnuma.c
@@ -0,0 +1,8 @@
+#include <numa.h>
+#include <numaif.h>
+
+int main(void)
+{
+ numa_available();
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-libperl.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libperl.c
@@ -0,0 +1,9 @@
+#include <EXTERN.h>
+#include <perl.h>
+
+int main(void)
+{
+ perl_alloc();
+
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-libpython-version.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libpython-version.c
@@ -0,0 +1,10 @@
+#include <Python.h>
+
+#if PY_VERSION_HEX >= 0x03000000
+ #error
+#endif
+
+int main(void)
+{
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-libpython.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libpython.c
@@ -0,0 +1,7 @@
+#include <Python.h>
+#
+int main(void)
+{
+ Py_Initialize();
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-libslang.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libslang.c
@@ -0,0 +1,6 @@
+#include <slang.h>
+
+int main(void)
+{
+ return SLsmg_init_smg();
+}
Index: tip/tools/perf/config/feature-checks/test-libunwind.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-libunwind.c
@@ -0,0 +1,20 @@
+#include <libunwind.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+ addr_space = unw_create_addr_space(NULL, 0);
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-on-exit.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-on-exit.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return on_exit(NULL, NULL);
+}
Index: tip/tools/perf/config/feature-checks/test-stackprotector-all.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-stackprotector-all.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
Index: tip/tools/perf/config/feature-checks/test-stackprotector.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-stackprotector.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
Index: tip/tools/perf/config/feature-checks/test-strlcpy.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-strlcpy.c
@@ -0,0 +1,8 @@
+#include <stdlib.h>
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+
+int main(void)
+{
+ strlcpy(NULL, NULL, 0);
+ return 0;
+}
Index: tip/tools/perf/config/feature-checks/test-volatile-register-var.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-volatile-register-var.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
Index: tip/tools/perf/perf.c
===================================================================
--- tip.orig/tools/perf/perf.c
+++ tip/tools/perf/perf.c
@@ -49,14 +49,14 @@ static struct cmd_struct commands[] = {
{ "version", cmd_version, 0 },
{ "script", cmd_script, 0 },
{ "sched", cmd_sched, 0 },
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
{ "probe", cmd_probe, 0 },
#endif
{ "kmem", cmd_kmem, 0 },
{ "lock", cmd_lock, 0 },
{ "kvm", cmd_kvm, 0 },
{ "test", cmd_test, 0 },
-#ifdef LIBAUDIT_SUPPORT
+#ifdef HAVE_LIBAUDIT_SUPPORT
{ "trace", cmd_trace, 0 },
#endif
{ "inject", cmd_inject, 0 },
Index: tip/tools/perf/ui/gtk/browser.c
===================================================================
--- tip.orig/tools/perf/ui/gtk/browser.c
+++ tip/tools/perf/ui/gtk/browser.c
@@ -43,7 +43,7 @@ const char *perf_gtk__get_percent_color(
return NULL;
}
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
GtkWidget *perf_gtk__setup_info_bar(void)
{
GtkWidget *info_bar;
Index: tip/tools/perf/ui/gtk/gtk.h
===================================================================
--- tip.orig/tools/perf/ui/gtk/gtk.h
+++ tip/tools/perf/ui/gtk/gtk.h
@@ -12,7 +12,7 @@ struct perf_gtk_context {
GtkWidget *main_window;
GtkWidget *notebook;
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
GtkWidget *info_bar;
GtkWidget *message_label;
#endif
@@ -39,7 +39,7 @@ void perf_gtk__resize_window(GtkWidget *
const char *perf_gtk__get_percent_color(double percent);
GtkWidget *perf_gtk__setup_statusbar(void);
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
GtkWidget *perf_gtk__setup_info_bar(void);
#else
static inline GtkWidget *perf_gtk__setup_info_bar(void)
Index: tip/tools/perf/ui/gtk/util.c
===================================================================
--- tip.orig/tools/perf/ui/gtk/util.c
+++ tip/tools/perf/ui/gtk/util.c
@@ -53,7 +53,7 @@ static int perf_gtk__error(const char *f
return 0;
}
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
static int perf_gtk__warning_info_bar(const char *format, va_list args)
{
char *msg;
@@ -105,7 +105,7 @@ static int perf_gtk__warning_statusbar(c
struct perf_error_ops perf_gtk_eops = {
.error = perf_gtk__error,
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
.warning = perf_gtk__warning_info_bar,
#else
.warning = perf_gtk__warning_statusbar,
Index: tip/tools/perf/ui/ui.h
===================================================================
--- tip.orig/tools/perf/ui/ui.h
+++ tip/tools/perf/ui/ui.h
@@ -12,7 +12,7 @@ extern int use_browser;
void setup_browser(bool fallback_to_pager);
void exit_browser(bool wait_for_ok);
-#ifdef SLANG_SUPPORT
+#ifdef HAVE_SLANG_SUPPORT
int ui__init(void);
void ui__exit(bool wait_for_ok);
#else
@@ -23,7 +23,7 @@ static inline int ui__init(void)
static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
#endif
-#ifdef GTK2_SUPPORT
+#ifdef HAVE_GTK2_SUPPORT
int perf_gtk__init(void);
void perf_gtk__exit(bool wait_for_ok);
#else
Index: tip/tools/perf/util/annotate.h
===================================================================
--- tip.orig/tools/perf/util/annotate.h
+++ tip/tools/perf/util/annotate.h
@@ -150,7 +150,7 @@ int symbol__tty_annotate(struct symbol *
struct perf_evsel *evsel, bool print_lines,
bool full_paths, int min_pcnt, int max_lines);
-#ifdef SLANG_SUPPORT
+#ifdef HAVE_SLANG_SUPPORT
int symbol__tui_annotate(struct symbol *sym, struct map *map,
struct perf_evsel *evsel,
struct hist_browser_timer *hbt);
@@ -165,7 +165,7 @@ static inline int symbol__tui_annotate(s
}
#endif
-#ifdef GTK2_SUPPORT
+#ifdef HAVE_GTK2_SUPPORT
int symbol__gtk_annotate(struct symbol *sym, struct map *map,
struct perf_evsel *evsel,
struct hist_browser_timer *hbt);
Index: tip/tools/perf/util/cache.h
===================================================================
--- tip.orig/tools/perf/util/cache.h
+++ tip/tools/perf/util/cache.h
@@ -70,7 +70,7 @@ extern char *perf_path(const char *fmt,
extern char *perf_pathdup(const char *fmt, ...)
__attribute__((format (printf, 1, 2)));
-#ifndef HAVE_STRLCPY
+#ifndef HAVE_STRLCPY_SUPPORT
extern size_t strlcpy(char *dest, const char *src, size_t size);
#endif
Index: tip/tools/perf/util/generate-cmdlist.sh
===================================================================
--- tip.orig/tools/perf/util/generate-cmdlist.sh
+++ tip/tools/perf/util/generate-cmdlist.sh
@@ -22,7 +22,7 @@ do
}' "Documentation/perf-$cmd.txt"
done
-echo "#ifdef LIBELF_SUPPORT"
+echo "#ifdef HAVE_LIBELF_SUPPORT"
sed -n -e 's/^perf-\([^ ]*\)[ ].* full.*/\1/p' command-list.txt |
sort |
while read cmd
@@ -35,5 +35,5 @@ do
p
}' "Documentation/perf-$cmd.txt"
done
-echo "#endif /* LIBELF_SUPPORT */"
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
echo "};"
Index: tip/tools/perf/util/hist.h
===================================================================
--- tip.orig/tools/perf/util/hist.h
+++ tip/tools/perf/util/hist.h
@@ -183,7 +183,7 @@ struct hist_browser_timer {
int refresh;
};
-#ifdef SLANG_SUPPORT
+#ifdef HAVE_SLANG_SUPPORT
#include "../ui/keysyms.h"
int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
struct hist_browser_timer *hbt);
@@ -224,7 +224,7 @@ static inline int script_browse(const ch
#define K_SWITCH_INPUT_DATA -3000
#endif
-#ifdef GTK2_SUPPORT
+#ifdef HAVE_GTK2_SUPPORT
int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt __maybe_unused,
float min_pcnt);
Index: tip/tools/perf/util/include/dwarf-regs.h
===================================================================
--- tip.orig/tools/perf/util/include/dwarf-regs.h
+++ tip/tools/perf/util/include/dwarf-regs.h
@@ -1,7 +1,7 @@
#ifndef _PERF_DWARF_REGS_H_
#define _PERF_DWARF_REGS_H_
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
const char *get_arch_regstr(unsigned int n);
#endif
Index: tip/tools/perf/util/map.c
===================================================================
--- tip.orig/tools/perf/util/map.c
+++ tip/tools/perf/util/map.c
@@ -172,7 +172,7 @@ int map__load(struct map *map, symbol_fi
pr_warning(", continuing without symbols\n");
return -1;
} else if (nr == 0) {
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
const size_t len = strlen(name);
const size_t real_len = len - sizeof(DSO__DELETED);
Index: tip/tools/perf/util/path.c
===================================================================
--- tip.orig/tools/perf/util/path.c
+++ tip/tools/perf/util/path.c
@@ -22,7 +22,7 @@ static const char *get_perf_dir(void)
return ".";
}
-#ifndef HAVE_STRLCPY
+#ifndef HAVE_STRLCPY_SUPPORT
size_t strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
Index: tip/tools/perf/util/perf_regs.h
===================================================================
--- tip.orig/tools/perf/util/perf_regs.h
+++ tip/tools/perf/util/perf_regs.h
@@ -1,7 +1,7 @@
#ifndef __PERF_REGS_H
#define __PERF_REGS_H
-#ifdef HAVE_PERF_REGS
+#ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h>
#else
#define PERF_REGS_MASK 0
@@ -10,5 +10,5 @@ static inline const char *perf_reg_name(
{
return NULL;
}
-#endif /* HAVE_PERF_REGS */
+#endif /* HAVE_PERF_REGS_SUPPORT */
#endif /* __PERF_REGS_H */
Index: tip/tools/perf/util/probe-event.c
===================================================================
--- tip.orig/tools/perf/util/probe-event.c
+++ tip/tools/perf/util/probe-event.c
@@ -201,7 +201,7 @@ static int convert_to_perf_probe_point(s
return 0;
}
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
/* Open new debuginfo of given module */
static struct debuginfo *open_debuginfo(const char *module)
{
@@ -630,7 +630,7 @@ int show_available_vars(struct perf_prob
return ret;
}
-#else /* !DWARF_SUPPORT */
+#else /* !HAVE_DWARF_SUPPORT */
static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
struct perf_probe_point *pp)
Index: tip/tools/perf/util/probe-finder.h
===================================================================
--- tip.orig/tools/perf/util/probe-finder.h
+++ tip/tools/perf/util/probe-finder.h
@@ -14,7 +14,7 @@ static inline int is_c_varname(const cha
return isalpha(name[0]) || name[0] == '_';
}
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
#include "dwarf-aux.h"
@@ -105,6 +105,6 @@ struct line_finder {
int found;
};
-#endif /* DWARF_SUPPORT */
+#endif /* HAVE_DWARF_SUPPORT */
#endif /*_PROBE_FINDER_H */
Index: tip/tools/perf/util/symbol-elf.c
===================================================================
--- tip.orig/tools/perf/util/symbol-elf.c
+++ tip/tools/perf/util/symbol-elf.c
@@ -8,7 +8,7 @@
#include "symbol.h"
#include "debug.h"
-#ifndef HAVE_ELF_GETPHDRNUM
+#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
static int elf_getphdrnum(Elf *elf, size_t *dst)
{
GElf_Ehdr gehdr;
Index: tip/tools/perf/util/symbol.h
===================================================================
--- tip.orig/tools/perf/util/symbol.h
+++ tip/tools/perf/util/symbol.h
@@ -13,7 +13,7 @@
#include <libgen.h>
#include "build-id.h"
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
#include <libelf.h>
#include <gelf.h>
#endif
@@ -21,7 +21,7 @@
#include "dso.h"
-#ifdef HAVE_CPLUS_DEMANGLE
+#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
extern char *cplus_demangle(const char *, int);
static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i)
@@ -46,7 +46,7 @@ static inline char *bfd_demangle(void __
* libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
* for newer versions we can use mmap to reduce memory usage:
*/
-#ifdef LIBELF_MMAP
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
#else
# define PERF_ELF_C_READ_MMAP ELF_C_READ
@@ -178,7 +178,7 @@ struct symsrc {
int fd;
enum dso_binary_type type;
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
Elf *elf;
GElf_Ehdr ehdr;
Index: tip/tools/perf/util/unwind.h
===================================================================
--- tip.orig/tools/perf/util/unwind.h
+++ tip/tools/perf/util/unwind.h
@@ -13,7 +13,7 @@ struct unwind_entry {
typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct machine *machine,
struct thread *thread,
@@ -31,5 +31,5 @@ unwind__get_entries(unwind_entry_cb_t cb
{
return 0;
}
-#endif /* LIBUNWIND_SUPPORT */
+#endif /* HAVE_LIBUNWIND_SUPPORT */
#endif /* __UNWIND_H */
Index: tip/tools/perf/util/util.c
===================================================================
--- tip.orig/tools/perf/util/util.c
+++ tip/tools/perf/util/util.c
@@ -1,7 +1,7 @@
#include "../perf.h"
#include "util.h"
#include <sys/mman.h>
-#ifdef BACKTRACE_SUPPORT
+#ifdef HAVE_BACKTRACE_SUPPORT
#include <execinfo.h>
#endif
#include <stdio.h>
@@ -204,7 +204,7 @@ int hex2u64(const char *ptr, u64 *long_v
}
/* Obtain a backtrace and print it to stdout. */
-#ifdef BACKTRACE_SUPPORT
+#ifdef HAVE_BACKTRACE_SUPPORT
void dump_stack(void)
{
void *array[16];
Em Mon, Sep 30, 2013 at 06:42:10PM +0200, Ingo Molnar escreveu:
>
> This series (with combo patch attached) implements (much) faster
> perf-tools feature-auto-detection.
Cool stuff!
Testing:
[acme@zoo linux]$ cat /etc/fedora-release
Fedora release 18 (Spherical Cow)
[acme@zoo linux]$ time make -j4 -C tools/perf O=/tmp/build/perf install-bin
Auto-detecting system features:
... stackprotector-all: [ on ]
... volatile-register-var: [ on ]
... fortify-source: [ on ]
... libelf: [ on ]
... libelf-mmap: [ on ]
... glibc: [ on ]
... dwarf: [ on ]
... libelf-getphdrnum: [ on ]
... libunwind: [ on ]
... libaudit: [ on ]
... libslang: [ on ]
... gtk2: [ on ]
... gtk2-infobar: [ on ]
... libperl: [ on ]
... libpython: [ on ]
... libpython-version: [ on ]
... libbfd: [ on ]
... strlcpy: [ OFF ]
... on-exit: [ on ]
... backtrace: [ on ]
... libnuma: [ on ]
make: Entering directory `/home/git/linux/tools/perf'
MKDIR /tmp/build/perf/arch/
Checking why that strlcpy failed...
- Arnaldo
Em Mon, Sep 30, 2013 at 02:12:20PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Sep 30, 2013 at 06:42:10PM +0200, Ingo Molnar escreveu:
> >
> > This series (with combo patch attached) implements (much) faster
> > perf-tools feature-auto-detection.
>
> Cool stuff!
>
> Testing:
>
> [acme@zoo linux]$ cat /etc/fedora-release
> Fedora release 18 (Spherical Cow)
Before:
Performance counter stats for 'make -j4 -C tools/perf O=/tmp/build/perf' (3 runs):
7.336208823 seconds time elapsed ( +- 5.75% )
After:
3.831295406 seconds time elapsed ( +- 1.20% )
- Arnaldo
Em Mon, Sep 30, 2013 at 02:27:41PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Sep 30, 2013 at 02:12:20PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, Sep 30, 2013 at 06:42:10PM +0200, Ingo Molnar escreveu:
> > > This series (with combo patch attached) implements (much) faster
> > > perf-tools feature-auto-detection.
> > Cool stuff!
> > Testing:
Removed python-devel and it correctly turned those off:
... libpython: [ OFF ]
... libpython-version: [ OFF ]
One possible way to speed this up a bit more would be to somehow state that if
the test A fails, then don't even bother testing test B, like in the case above.
- Arnaldo
On Mon, Sep 30, 2013 at 10:12 AM, Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Checking why that strlcpy failed...
I don't think glibc does strlcpy. It's not a standard C function, and
it's somewhat controversial (although I dislike strncpy more with the
crazy zero-padding, ugh).
Linus
Em Mon, Sep 30, 2013 at 02:30:52PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Sep 30, 2013 at 02:27:41PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, Sep 30, 2013 at 02:12:20PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Mon, Sep 30, 2013 at 06:42:10PM +0200, Ingo Molnar escreveu:
> > > > This series (with combo patch attached) implements (much) faster
> > > > perf-tools feature-auto-detection.
> Removed python-devel and it correctly turned those off:
>
> ... libpython: [ OFF ]
> ... libpython-version: [ OFF ]
>
> One possible way to speed this up a bit more would be to somehow state that if
> the test A fails, then don't even bother testing test B, like in the case above.
Another suggestion: To provide a short sentence with each feature
stating what will be not present when something is "OFF", or even a
sentence stating what the feature is about, so the user get a better
picture of what is (not) being built into his tool.
Probably reusing strings we already have in the makefiles.
- Arnaldo
Em Mon, Sep 30, 2013 at 02:36:27PM -0300, Arnaldo Carvalho de Melo escreveu:
> Another suggestion: To provide a short sentence with each feature
> stating what will be not present when something is "OFF", or even a
> sentence stating what the feature is about, so the user get a better
> picture of what is (not) being built into his tool.
>
> Probably reusing strings we already have in the makefiles.
And also reusing existing strings to tell the user which devel packages
should be installed to get that feature built/enabled.
- Arnaldo
Em Mon, Sep 30, 2013 at 02:39:14PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Sep 30, 2013 at 02:36:27PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Another suggestion: To provide a short sentence with each feature
> > stating what will be not present when something is "OFF", or even a
> > sentence stating what the feature is about, so the user get a better
> > picture of what is (not) being built into his tool.
> >
> > Probably reusing strings we already have in the makefiles.
>
> And also reusing existing strings to tell the user which devel packages
> should be installed to get that feature built/enabled.
config/Makefile:187: No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev
config/Makefile:251: No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99
config/Makefile:279: slang not found, disables TUI support. Please install slang-devel or libslang-dev
config/Makefile:292: GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev
config/Makefile:346: The path '/usr/bin/python-config' is not executable.
config/Makefile:350: No python-config tool was found
config/Makefile:350: Python support will not be built
config/Makefile:444: No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev
My bad, its still there, sorry for the noise 8-)
I'm testing removing devel packages, one by one, removing gtk2-devel provides
the biggest reduction in build time, no surprises :)
- Arnaldo
Em Mon, Sep 30, 2013 at 10:34:27AM -0700, Linus Torvalds escreveu:
> On Mon, Sep 30, 2013 at 10:12 AM, Arnaldo Carvalho de Melo
> <[email protected]> wrote:
> >
> > Checking why that strlcpy failed...
>
> I don't think glibc does strlcpy. It's not a standard C function, and
My concern was more about the thinking: ``Is this red "OFF" thing a
problem? I feel so much more confortable when all entries have nice
green "on" lights...''
> it's somewhat controversial (although I dislike strncpy more with the
> crazy zero-padding, ugh).
:-)
- Arnaldo
Em Mon, Sep 30, 2013 at 02:46:39PM -0300, Arnaldo Carvalho de Melo escreveu:
> I'm testing removing devel packages, one by one, removing gtk2-devel provides
> the biggest reduction in build time, no surprises :)
And this exercise spotted a but:
make[1]: Leaving directory `/home/git/linux/tools/lib/lk'
GEN /tmp/build/perf/python/perf.so
Context.xs:22:20: fatal error: EXTERN.h: No such file or directory
compilation terminated.
make: *** [/tmp/build/perf/scripts/perl/Perf-Trace-Util/Context.o] Error 1
make: *** Waiting for unfinished jobs....
util/scripting-engines/trace-event-perl.c:29:20: fatal error: EXTERN.h: No such file or directory
compilation terminated.
make: *** [/tmp/build/perf/util/scripting-engines/trace-event-perl.o] Error 1
make: Leaving directory `/home/git/linux/tools/perf'
Performance counter stats for 'make -j4 -C tools/perf O=/tmp/build/perf' (3 runs):
6.453914638 seconds time elapsed ( +- 80.66% )
[acme@zoo linux]$
[acme@zoo linux]$
I.e. I did a 'yum remove python-devel', that removed some more packages that
require this package, one of them is needed for perf to build, and this is
not stated on those:
config/Makefile:444: No numa.h found, disables 'perf bench numa mem' benchmark,
please install numa-libs-devel or libnuma-dev
In fact this is why I thought those messages had been missing in your
patchset, they don't appear when I remove perl-devel, i.e. 'perl' supporty
is correctly detected as not possible to build due to missing deps, but
no message is emitted for this case.
And the bug is that the feature test for perl support passes, but since it
doesn't try to use EXTERN.h, like something that is enabled when the feature
test passes (util/scripting-engines/trace-event-perl.c), the build fails.
Notebook battery dying, will continue later...
- Arnaldo
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> Em Mon, Sep 30, 2013 at 10:34:27AM -0700, Linus Torvalds escreveu:
> > On Mon, Sep 30, 2013 at 10:12 AM, Arnaldo Carvalho de Melo
> > <[email protected]> wrote:
> > >
> > > Checking why that strlcpy failed...
> >
> > I don't think glibc does strlcpy. It's not a standard C function, and
>
> My concern was more about the thinking: ``Is this red "OFF" thing a
> problem? I feel so much more confortable when all entries have nice
> green "on" lights...''
Yeah, so I think we should add our internal implementation of strlcpy() as
a __weak function instead - if the libc does not provide then we provide a
fallback.
That should get rid of another ~50 msecs of build overhead, as failed
feature tests are the most expensive ones.
Agreed?
Thanks,
Ingo
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> Em Mon, Sep 30, 2013 at 02:27:41PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, Sep 30, 2013 at 02:12:20PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Mon, Sep 30, 2013 at 06:42:10PM +0200, Ingo Molnar escreveu:
> > > > This series (with combo patch attached) implements (much) faster
> > > > perf-tools feature-auto-detection.
>
> > > Cool stuff!
>
> > > Testing:
>
>
> Removed python-devel and it correctly turned those off:
>
> ... libpython: [ OFF ]
> ... libpython-version: [ OFF ]
>
> One possible way to speed this up a bit more would be to somehow state
> that if the test A fails, then don't even bother testing test B, like in
> the case above.
Yeah, that's possible in a way, I did it in another case, see how the
'bionic' test for Android libc gets executed separately (but still
unconditionally - we should probably only execute it if the libc test
fails).
The 'FEATURE_TESTS' variable holds the 'flat' list of features that we
want to definitely test.
Also, if glibc test passes then perhaps the strlcpy() test does not have
to be executed at all.
In the initial version I wanted to maintain the existing feature
dependency.
Thanks,
Ingo
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> In fact this is why I thought those messages had been missing in your
> patchset, they don't appear when I remove perl-devel, i.e. 'perl'
> supporty is correctly detected as not possible to build due to missing
> deps, but no message is emitted for this case.
>
> And the bug is that the feature test for perl support passes, but since
> it doesn't try to use EXTERN.h, like something that is enabled when the
> feature test passes (util/scripting-engines/trace-event-perl.c), the
> build fails.
Hm, I probably messed up the lib-perl testcase - it was one of the more
complex ones. I assumed that this:
... libperl: [ OFF ]
meant that I didn't have the dependencies installed - but it's the
testcase that is wrong most likely.
Thanks,
Ingo
On Mon, Sep 30, 2013 at 7:34 PM, Linus Torvalds
<[email protected]> wrote:
> On Mon, Sep 30, 2013 at 10:12 AM, Arnaldo Carvalho de Melo
> <[email protected]> wrote:
>>
>> Checking why that strlcpy failed...
>
> I don't think glibc does strlcpy. It's not a standard C function, and
> it's somewhat controversial (although I dislike strncpy more with the
> crazy zero-padding, ugh).
Indeed, e.g. on Ubuntu it's in libbsd, so you also need to #include
<bsd/string.h> and link with -lbsd.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
* Geert Uytterhoeven <[email protected]> wrote:
> On Mon, Sep 30, 2013 at 7:34 PM, Linus Torvalds
> <[email protected]> wrote:
> > On Mon, Sep 30, 2013 at 10:12 AM, Arnaldo Carvalho de Melo
> > <[email protected]> wrote:
> >>
> >> Checking why that strlcpy failed...
> >
> > I don't think glibc does strlcpy. It's not a standard C function, and
> > it's somewhat controversial (although I dislike strncpy more with the
> > crazy zero-padding, ugh).
>
> Indeed, e.g. on Ubuntu it's in libbsd, so you also need to #include
> <bsd/string.h> and link with -lbsd.
Well, the point of the feature test is not to pick up an external
definition of strlcpy(), but to keep working on systems where strcpy() has
been patched into _glibc_, where we cannot avoid linking to it. (ARCH
Linux is such a distribution for example.)
So it's a quirk in essence - and I think marking our implementation as
__weak ought to solve that.
Thanks,
Ingo
* Ingo Molnar <[email protected]> wrote:
> > > > Checking why that strlcpy failed...
> > >
> > > I don't think glibc does strlcpy. It's not a standard C function,
> > > and
> >
> > My concern was more about the thinking: ``Is this red "OFF" thing a
> > problem? I feel so much more confortable when all entries have nice
> > green "on" lights...''
>
> Yeah, so I think we should add our internal implementation of strlcpy()
> as a __weak function instead - if the libc does not provide then we
> provide a fallback.
>
> That should get rid of another ~50 msecs of build overhead, as failed
> feature tests are the most expensive ones.
The patch below implements that. I haven't actually tested it on a system
with a in-libc strlcpy implementation, but it Should Just Work (tm) ;-)
Overhead is down from 0.600 secs to 0.540 secs. The only remaining thing
is the libperl bug, I'll have a look at that next.
( I also couldn't resist fixing up perf's version of compiler.h a bit,
will split that out into a separate patch later on. )
Thanks,
Ingo
=====================>
Subject: perf autodep: Remove strlcpy feature check, add __weak strlcpy implementation
From: Ingo Molnar <[email protected]>
Date: Tue Oct 1 13:26:13 CEST 2013
---
tools/perf/config/Makefile | 8 +-------
tools/perf/config/feature-checks/Makefile | 3 ---
tools/perf/config/feature-checks/test-strlcpy.c | 8 --------
tools/perf/util/cache.h | 3 +--
tools/perf/util/include/linux/compiler.h | 19 ++++++++++++++-----
tools/perf/util/path.c | 10 +++++++---
6 files changed, 23 insertions(+), 28 deletions(-)
Index: tip/tools/perf/config/Makefile
===================================================================
--- tip.orig/tools/perf/config/Makefile
+++ tip/tools/perf/config/Makefile
@@ -101,7 +101,7 @@ $(info )
$(info Auto-detecting system features:)
$(shell make -i -j -C config/feature-checks >/dev/null 2>&1)
-FEATURE_TESTS = stackprotector-all volatile-register-var fortify-source libelf libelf-mmap glibc dwarf libelf-getphdrnum libunwind libaudit libslang gtk2 gtk2-infobar libperl libpython libpython-version libbfd strlcpy on-exit backtrace libnuma
+FEATURE_TESTS = stackprotector-all volatile-register-var fortify-source libelf libelf-mmap glibc dwarf libelf-getphdrnum libunwind libaudit libslang gtk2 gtk2-infobar libperl libpython libpython-version libbfd on-exit backtrace libnuma
$(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
@@ -421,12 +421,6 @@ else
endif
endif
-ifndef NO_STRLCPY
- ifeq ($(feature-strlcpy), 1)
- CFLAGS += -DHAVE_STRLCPY_SUPPORT
- endif
-endif
-
ifndef NO_ON_EXIT
ifeq ($(feature-on-exit), 1)
CFLAGS += -DHAVE_ON_EXIT_SUPPORT
Index: tip/tools/perf/config/feature-checks/Makefile
===================================================================
--- tip.orig/tools/perf/config/feature-checks/Makefile
+++ tip/tools/perf/config/feature-checks/Makefile
@@ -93,9 +93,6 @@ test-libpython-version: test-libpython-v
test-libbfd: test-libbfd.c
$(CC) -o $@ [email protected] -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl
-test-strlcpy: test-strlcpy.c
- $(CC) -o $@ [email protected]
-
test-on-exit: test-on-exit.c
$(CC) -o $@ [email protected]
Index: tip/tools/perf/config/feature-checks/test-strlcpy.c
===================================================================
--- tip.orig/tools/perf/config/feature-checks/test-strlcpy.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <stdlib.h>
-extern size_t strlcpy(char *dest, const char *src, size_t size);
-
-int main(void)
-{
- strlcpy(NULL, NULL, 0);
- return 0;
-}
Index: tip/tools/perf/util/cache.h
===================================================================
--- tip.orig/tools/perf/util/cache.h
+++ tip/tools/perf/util/cache.h
@@ -70,8 +70,7 @@ extern char *perf_path(const char *fmt,
extern char *perf_pathdup(const char *fmt, ...)
__attribute__((format (printf, 1, 2)));
-#ifndef HAVE_STRLCPY_SUPPORT
+/* Matches the libc/libbsd function attribute so we declare this unconditionally: */
extern size_t strlcpy(char *dest, const char *src, size_t size);
-#endif
#endif /* __PERF_CACHE_H */
Index: tip/tools/perf/util/include/linux/compiler.h
===================================================================
--- tip.orig/tools/perf/util/include/linux/compiler.h
+++ tip/tools/perf/util/include/linux/compiler.h
@@ -2,20 +2,29 @@
#define _PERF_LINUX_COMPILER_H_
#ifndef __always_inline
-#define __always_inline inline
+# define __always_inline inline __attribute__((always_inline))
#endif
+
#define __user
+
#ifndef __attribute_const__
-#define __attribute_const__
+# define __attribute_const__
#endif
#ifndef __maybe_unused
-#define __maybe_unused __attribute__((unused))
+# define __maybe_unused __attribute__((unused))
+#endif
+
+#ifndef __packed
+# define __packed __attribute__((__packed__))
#endif
-#define __packed __attribute__((__packed__))
#ifndef __force
-#define __force
+# define __force
+#endif
+
+#ifndef __weak
+# define __weak __attribute__((weak))
#endif
#endif
Index: tip/tools/perf/util/path.c
===================================================================
--- tip.orig/tools/perf/util/path.c
+++ tip/tools/perf/util/path.c
@@ -22,19 +22,23 @@ static const char *get_perf_dir(void)
return ".";
}
-#ifndef HAVE_STRLCPY_SUPPORT
-size_t strlcpy(char *dest, const char *src, size_t size)
+/*
+ * If libc has strlcpy() then that version will override this
+ * implementation:
+ */
+size_t __weak strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
if (size) {
size_t len = (ret >= size) ? size - 1 : ret;
+
memcpy(dest, src, len);
dest[len] = '\0';
}
+
return ret;
}
-#endif
static char *get_pathname(void)
{
* Ingo Molnar <[email protected]> wrote:
> Overhead is down from 0.600 secs to 0.540 secs. The only remaining thing
> is the libperl bug, I'll have a look at that next.
So, libperl detection works fine here, once I've installed the prereq
package on Fedora, "perl-ExtUtils-Embed":
comet:~/tip/tools/perf> make Makefile
Auto-detecting system features:
... stackprotector-all: [ on ]
... volatile-register-var: [ on ]
... fortify-source: [ on ]
... libelf: [ on ]
... libelf-mmap: [ on ]
... glibc: [ on ]
... dwarf: [ on ]
... libelf-getphdrnum: [ on ]
... libunwind: [ on ]
... libaudit: [ on ]
... libslang: [ on ]
... gtk2: [ on ]
... gtk2-infobar: [ on ]
... libperl: [ on ]
... libpython: [ on ]
... libpython-version: [ on ]
... libbfd: [ on ]
... on-exit: [ on ]
... backtrace: [ on ]
... libnuma: [ on ]
Time is down to 0.480 sec because there are no build failures now, only
Make re-checking the dependencies of already built binaries.
And the actual feature check is roughly 0.330 msecs of that:
comet:~/tip/tools/perf/config/feature-checks> time ( make -j >/dev/null; \
for N in stackprotector-all volatile-register-var fortify-source libelf \
libelf-mmap glibc dwarf libelf-getphdrnum libunwind libaudit libslang gtk2 \
gtk2-infobar libperl libpython libpython-version libbfd on-exit backtrace \
libnuma; do make test-$N >/dev/null; done )
real 0m0.330s
user 0m0.290s
sys 0m0.031s
With 0.150 msecs spent elsewhere.
So there's more speedups possible I think, for example we could construct
an 'optimistic' testcase that is generated live and includes a
concatenation of all the testcases.
If the build of that file succeeds then we have a really efficient
fast-path both in the first-build and in the repeat-build case.
If that build fails then we do the more finegrained feature check.
Thoughts?
Thanks,
Ingo
Em Tue, Oct 01, 2013 at 02:04:55PM +0200, Ingo Molnar escreveu:
> So there's more speedups possible I think, for example we could construct
> an 'optimistic' testcase that is generated live and includes a
> concatenation of all the testcases.
>
> If the build of that file succeeds then we have a really efficient
> fast-path both in the first-build and in the repeat-build case.
>
> If that build fails then we do the more finegrained feature check.
>
> Thoughts?
Lets get what you have merged and continue from there ;-)
- Arnaldo
* Ingo Molnar <[email protected]> wrote:
> And the actual feature check is roughly 0.330 msecs of that:
>
> comet:~/tip/tools/perf/config/feature-checks> time ( make -j >/dev/null; \
> for N in stackprotector-all volatile-register-var fortify-source libelf \
> libelf-mmap glibc dwarf libelf-getphdrnum libunwind libaudit libslang gtk2 \
> gtk2-infobar libperl libpython libpython-version libbfd on-exit backtrace \
> libnuma; do make test-$N >/dev/null; done )
>
> real 0m0.330s
> user 0m0.290s
> sys 0m0.031s
>
> With 0.150 msecs spent elsewhere.
>
> So there's more speedups possible I think, for example we could
> construct an 'optimistic' testcase that is generated live and includes a
> concatenation of all the testcases.
>
> If the build of that file succeeds then we have a really efficient
> fast-path both in the first-build and in the repeat-build case.
>
> If that build fails then we do the more finegrained feature check.
>
> Thoughts?
So, something like the patch below. It contains test-all.c "testcase from
hell" which will succeed on a well configured system.
With this final trick I got a ridiculous speedup in auto-detection speed,
for the rebuild-again case:
comet:~/tip/tools/perf> perf stat --null --repeat 5 make Makefile
Auto-detecting system features:
... stackprotector-all: [ on ]
... volatile-register-var: [ on ]
... fortify-source: [ on ]
... libelf: [ on ]
... libelf-mmap: [ on ]
... glibc: [ on ]
... dwarf: [ on ]
... libelf-getphdrnum: [ on ]
... libunwind: [ on ]
... libaudit: [ on ]
... libslang: [ on ]
... gtk2: [ on ]
... gtk2-infobar: [ on ]
... libperl: [ on ]
... libpython: [ on ]
... libpython-version: [ on ]
... libbfd: [ on ]
... on-exit: [ on ]
... backtrace: [ on ]
... libnuma: [ on ]
make: Nothing to be done for `Makefile'.
Performance counter stats for 'make Makefile' (5 runs):
0.183843005 seconds time elapsed ( +- 0.21% )
So the cached build time is down from 3.300 secs to 0.183 secs, an 18-fold
speedup.
It's still full auto-detection, because the -MD dependency generation
works for test-all.c as well. For example once I remove the
'perl-ExtUtils-Embed' package, I immediately get the correct, adapted
auto-dep output with the 'libperl' testcase showing 'OFF':
comet:~/tip/tools/perf> make Makefile
Auto-detecting system features:
... stackprotector-all: [ on ]
... volatile-register-var: [ on ]
... fortify-source: [ on ]
... libelf: [ on ]
... libelf-mmap: [ on ]
... glibc: [ on ]
... dwarf: [ on ]
... libelf-getphdrnum: [ on ]
... libunwind: [ on ]
... libaudit: [ on ]
... libslang: [ on ]
... gtk2: [ on ]
... gtk2-infobar: [ on ]
... libperl: [ OFF ]
... libpython: [ on ]
... libpython-version: [ on ]
... libbfd: [ on ]
... on-exit: [ on ]
... backtrace: [ on ]
... libnuma: [ on ]
Note that at this point this is just a proof-of-concept patch for
performance testing, nothing mergable yet.
In particular I'm not happy yet with the current construction of
test-all.c and its build method: it's the result of concatenation of files
and build arguments plus further massaging.
That should probably be automated in some fashion, to make it easier to
add new testcases and to make the whole construct more maintainable.
Thanks,
Ingo
=========================>
Subject: perf autodep: Speed up the 'all features are present' case
From: Ingo Molnar <[email protected]>
Date: Tue Oct 1 14:14:31 CEST 2013
---
tools/perf/config/Makefile | 18 ++
tools/perf/config/feature-checks/Makefile | 3
tools/perf/config/feature-checks/test-all.c | 196 ++++++++++++++++++++++++++++
3 files changed, 215 insertions(+), 2 deletions(-)
Index: tip/tools/perf/config/Makefile
===================================================================
--- tip.orig/tools/perf/config/Makefile
+++ tip/tools/perf/config/Makefile
@@ -94,16 +94,30 @@ define feature_check_code
feature-$(1) := $(shell make -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
+feature_set = $(eval $(feature_set_code))
+define feature_set_code
+ feature-$(1) := 1
+endef
+
#
# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
#
$(info )
$(info Auto-detecting system features:)
-$(shell make -i -j -C config/feature-checks >/dev/null 2>&1)
FEATURE_TESTS = stackprotector-all volatile-register-var fortify-source libelf libelf-mmap glibc dwarf libelf-getphdrnum libunwind libaudit libslang gtk2 gtk2-infobar libperl libpython libpython-version libbfd on-exit backtrace libnuma
-$(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
+#
+# Special fast-path for the 'all features are available' case:
+#
+$(call feature_check,all)
+
+ifeq ($(feature-all), 1)
+ $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
+else
+ $(shell make -i -j -C config/feature-checks >/dev/null 2>&1)
+ $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
+endif
feature_print = $(eval $(feature_print_code))
Index: tip/tools/perf/config/feature-checks/Makefile
===================================================================
--- tip.orig/tools/perf/config/feature-checks/Makefile
+++ tip/tools/perf/config/feature-checks/Makefile
@@ -99,6 +99,9 @@ test-on-exit: test-on-exit.c
test-backtrace: test-backtrace.c
$(CC) -o $@ [email protected]
+test-all:
+ $(CC) -o $@ [email protected] -Werror -fstack-protector -Wvolatile-register-var -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lunwind -lunwind-x86_64 -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl
+
-include *.d */*.d
###############################
Index: tip/tools/perf/config/feature-checks/test-all.c
===================================================================
--- /dev/null
+++ tip/tools/perf/config/feature-checks/test-all.c
@@ -0,0 +1,196 @@
+
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+
+#include <Python.h>
+
+#include <EXTERN.h>
+#include <perl.h>
+
+#include <stdio.h>
+#include <libelf.h>
+#include <gnu/libc-version.h>
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
+#include <libelf.h>
+#include <libunwind.h>
+#include <stdlib.h>
+#include <libaudit.h>
+#include <slang.h>
+#include <gtk/gtk.h>
+#include <bfd.h>
+#include <stdio.h>
+#include <execinfo.h>
+#include <stdio.h>
+#include <numa.h>
+#include <numaif.h>
+
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+int main1(void)
+{
+ return puts("hi");
+}
+
+int main2(void)
+{
+ return puts("hi");
+}
+
+int main3(void)
+{
+ return puts("hi");
+}
+
+int main4(void)
+{
+ Elf *elf = elf_begin(0, ELF_C_READ, 0);
+ return (long)elf;
+}
+#
+int main5(void)
+{
+ Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
+ return (long)elf;
+}
+
+int main6(void)
+{
+ const char *version = gnu_get_libc_version();
+ return (long)version;
+}
+
+int main7(void)
+{
+ Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
+ return (long)dbg;
+}
+
+int main8(void)
+{
+ size_t dst;
+ return elf_getphdrnum(0, &dst);
+}
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+int main9(void)
+{
+ unw_addr_space_t addr_space;
+ addr_space = unw_create_addr_space(NULL, 0);
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+ return 0;
+}
+
+int main10(void)
+{
+ printf("error message: %s\n", audit_errno_to_name(0));
+ return audit_open();
+}
+
+int main11(void)
+{
+ return SLsmg_init_smg();
+}
+
+int main12(int argc, char *argv[])
+{
+ gtk_init(&argc, &argv);
+
+ return 0;
+}
+
+int main13(void)
+{
+ gtk_info_bar_new();
+
+ return 0;
+}
+
+int main14(void)
+{
+ perl_alloc();
+
+ return 0;
+}
+
+int main15(void)
+{
+ Py_Initialize();
+ return 0;
+}
+
+#if PY_VERSION_HEX >= 0x03000000
+ #error
+#endif
+
+int main16(void)
+{
+ return 0;
+}
+
+int main17(void)
+{
+ bfd_demangle(0, 0, 0);
+ return 0;
+}
+
+void exit_function(int x, void *y)
+{
+}
+
+int main18(void)
+{
+ return on_exit(exit_function, NULL);
+}
+
+int main19(void)
+{
+ void *backtrace_fns[1];
+ size_t entries;
+
+ entries = backtrace(backtrace_fns, 1);
+ backtrace_symbols(backtrace_fns, entries);
+
+ return 0;
+}
+
+int main20(void)
+{
+ numa_available();
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ main1();
+ main2();
+ main3();
+ main4();
+ main5();
+ main6();
+ main7();
+ main8();
+ main9();
+ main10();
+ main11();
+ main12(argc, argv);
+ main13();
+ main14();
+ main15();
+ main16();
+ main17();
+ main18();
+ main19();
+ main20();
+
+ return 0;
+}
Another build speedup, while at it.
===============>
Subject: perf tools: Speed up git-version test on re-make
From: Ingo Molnar <[email protected]>
Date: Tue Oct 1 16:28:09 CEST 2013
util/PERF-VERSION-GEN is currently executed on every build attempt,
and this script can take a lot of time on trees that are at a
significant git-distance from Linus's tree:
$ time util/PERF-VERSION-GEN
real 0m4.343s
user 0m4.176s
sys 0m0.140s
It also takes a lot of time if the Git repository is network attached, etc.,
because the commands it uses:
TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
has to count commits from the nearest tag and thus has to access (and
decompress) every git commit blob on the relevant version path.
Even on Linus's tree it takes 0.28 seconds on a fast box to count all the
commits and get the git version string:
$ time util/PERF-VERSION-GEN
real 0m0.279s
user 0m0.247s
sys 0m0.025s
But the version string only has to be regenerated if the git repository's
head commit changes. So add a dependency of ../../.git/HEAD and touch
the file every time it's regenerated, so that Make's build rules can
pick it up and cache the result:
make: `PERF-VERSION-FILE' is up to date.
real 0m0.184s
user 0m0.117s
sys 0m0.026s
So the version generation overhead is completely gone for re-builds.
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Makefile | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
Index: tip/tools/perf/Makefile
===================================================================
--- tip.orig/tools/perf/Makefile
+++ tip/tools/perf/Makefile
@@ -70,8 +70,9 @@ ifneq ($(OUTPUT),)
#$(info Determined 'OUTPUT' to be $(OUTPUT))
endif
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
+$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+ @touch $(OUTPUT)PERF-VERSION-FILE
CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar
@@ -813,7 +814,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBLK)-c
$(RM) $(OUTPUT)util/*-flex*
$(python-clean)
+#
+# Trick: if ../../.git does not exist - we are building out of tree for example,
+# then force version regeneration:
+#
+ifeq ($(wildcard ../../.git/HEAD),)
+ GIT-HEAD-PHONY = ../../.git/HEAD
+else
+ GIT-HEAD-PHONY =
+endif
+
.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
Ok, this should be my final perf-build speedup patch.
With this patch and all the other patches applied perf delta-builds very
fast now - an empty re-build takes just 0.2 seconds:
comet:~/tip/tools/perf> time make
real 0m0.207s
user 0m0.130s
sys 0m0.034s
and the rebuild after a single .c file was changed is just 1.8 seconds:
comet:~/tip/tools/perf> touch perf.c; time make
real 0m1.892s
user 0m1.495s
sys 0m0.337s
Without the changes this used to be 9.4 seconds:
comet:~/tip/tools/perf> touch perf.c; time make
real 0m9.418s
user 0m8.251s
sys 0m0.996s
which was an eternity! :-)
Thanks,
Ingo
------------------------>
Subject: perf tools: Speed up the final link
From: Ingo Molnar <[email protected]>
Date: Tue Oct 1 17:17:22 CEST 2013
libtraceevent.a and liblk.a rules have always-missed dependencies,
which causes python.so to be relinked at every build attempt - even
if none of the affected code changes.
This slows down re-builds unnecessarily, by adding more than a second
to the build time:
comet:~/tip/tools/perf> time make
...
SUBDIR /fast/mingo/tip/tools/lib/lk/
make[1]: `liblk.a' is up to date.
SUBDIR /fast/mingo/tip/tools/lib/traceevent/
LINK perf
GEN python/perf.so
real 0m1.701s
user 0m1.338s
sys 0m0.301s
Add the (trivial) dependencies to not force a re-link.
This speeds up an empty re-build enormously:
comet:~/tip/tools/perf> time make
...
real 0m0.207s
user 0m0.134s
sys 0m0.028s
[ This adds some coupling between the build dependencies of
libtraceevent and liblk - but until those stay relatively
simple this should not be an issue. ]
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Makefile | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
Index: tip/tools/perf/Makefile
===================================================================
--- tip.orig/tools/perf/Makefile
+++ tip/tools/perf/Makefile
@@ -669,15 +669,19 @@ $(LIB_FILE): $(LIB_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
# libtraceevent.a
-$(LIBTRACEEVENT):
+TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch])
+
+$(LIBTRACEEVENT): $(TE_SOURCES)
$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a
$(LIBTRACEEVENT)-clean:
$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
+LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch])
+
# if subdir is set, we've been called from above so target has been built
# already
-$(LIBLK):
+$(LIBLK): $(LIBLK_SOURCES)
ifeq ($(subdir),)
$(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a
endif
@@ -824,7 +828,7 @@ else
GIT-HEAD-PHONY =
endif
-.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
+.PHONY: all install clean strip
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
(Sent with proper subject line.)
=================>
libtraceevent.a and liblk.a rules have always-missed dependencies,
which causes python.so to be relinked at every build attempt - even
if none of the affected code changes.
This slows down re-builds unnecessarily, by adding more than a second
to the build time:
comet:~/tip/tools/perf> time make
...
SUBDIR /fast/mingo/tip/tools/lib/lk/
make[1]: `liblk.a' is up to date.
SUBDIR /fast/mingo/tip/tools/lib/traceevent/
LINK perf
GEN python/perf.so
real 0m1.701s
user 0m1.338s
sys 0m0.301s
Add the (trivial) dependencies to not force a re-link.
This speeds up an empty re-build enormously:
comet:~/tip/tools/perf> time make
...
real 0m0.207s
user 0m0.134s
sys 0m0.028s
[ This adds some coupling between the build dependencies of
libtraceevent and liblk - but until those stay relatively
simple this should not be an issue. ]
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Makefile | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
Index: tip/tools/perf/Makefile
===================================================================
--- tip.orig/tools/perf/Makefile
+++ tip/tools/perf/Makefile
@@ -669,15 +669,19 @@ $(LIB_FILE): $(LIB_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
# libtraceevent.a
-$(LIBTRACEEVENT):
+TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch])
+
+$(LIBTRACEEVENT): $(TE_SOURCES)
$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a
$(LIBTRACEEVENT)-clean:
$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
+LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch])
+
# if subdir is set, we've been called from above so target has been built
# already
-$(LIBLK):
+$(LIBLK): $(LIBLK_SOURCES)
ifeq ($(subdir),)
$(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a
endif
@@ -824,7 +828,7 @@ else
GIT-HEAD-PHONY =
endif
-.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
+.PHONY: all install clean strip
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
Hi Ingo,
On Mon, 30 Sep 2013 18:42:10 +0200, Ingo Molnar wrote:
> This series (with combo patch attached) implements (much) faster
> perf-tools feature-auto-detection.
>
> I used 3 tricks to implement feature auto-dependencies and to speed up
> feature detection:
>
> - standalone Makefile in config/feature-checks/ built in parallel
>
> - split-out standalone .c files in config/feature-checks/*.c
>
> - used GCC's auto-dependency generation feature (-MD) to track the
> effects of system library addition/removal.
I have a memory that this could lead to a nasty build failure. Please
see the commit b6f4f804108b ("tools lib traceevent: Do not generate
dependency for system header files").
The problem is that it turned out to depend on some compiler headers
which are located under some directory with a version number. If so,
when compiler upgraded to a new version, it cannot find the original
dependencies so fail to build.
$ cat config/feature-checks/test-libelf.d
test-libelf: test-libelf.c /usr/include/libelf.h /usr/include/sys/types.h \
/usr/include/features.h /usr/include/stdc-predef.h \
/usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
/usr/include/time.h \
/usr/lib/gcc/x86_64-redhat-linux/4.7.2/include/stddef.h \
/usr/include/endian.h /usr/include/bits/endian.h \
/usr/include/bits/byteswap.h /usr/include/bits/byteswap-16.h \
/usr/include/sys/select.h /usr/include/bits/select.h \
/usr/include/bits/sigset.h /usr/include/bits/time.h \
/usr/include/sys/sysmacros.h /usr/include/bits/pthreadtypes.h \
/usr/include/elf.h \
/usr/lib/gcc/x86_64-redhat-linux/4.7.2/include/stdint.h \
/usr/include/stdint.h /usr/include/bits/wchar.h
In this case we are using this for feature-checking, so I guess it'd
fail to check the feature after upgrade.
Thanks,
Namhyung
* Namhyung Kim <[email protected]> wrote:
> Hi Ingo,
>
> On Mon, 30 Sep 2013 18:42:10 +0200, Ingo Molnar wrote:
> > This series (with combo patch attached) implements (much) faster
> > perf-tools feature-auto-detection.
> >
> > I used 3 tricks to implement feature auto-dependencies and to speed up
> > feature detection:
> >
> > - standalone Makefile in config/feature-checks/ built in parallel
> >
> > - split-out standalone .c files in config/feature-checks/*.c
> >
> > - used GCC's auto-dependency generation feature (-MD) to track the
> > effects of system library addition/removal.
>
> I have a memory that this could lead to a nasty build failure. Please
> see the commit b6f4f804108b ("tools lib traceevent: Do not generate
> dependency for system header files").
I think that at least the 'make clean' failure was just a buggy Makefile.
To quote the build error from the commit:
comet:~/tip/tools/lib/traceevent> make clean
make: *** No rule to make target `/usr/lib/gcc/x86_64-redhat-linux/4.7.0/include/stddef.h', needed by `.trace-seq
It suggests that the 'clean' target depended on .d dependency files -
that's a fundamentally incorrect use of -M/-MD auto-dependencies.
> The problem is that it turned out to depend on some compiler headers
> which are located under some directory with a version number. If so,
> when compiler upgraded to a new version, it cannot find the original
> dependencies so fail to build.
>
> $ cat config/feature-checks/test-libelf.d
> test-libelf: test-libelf.c /usr/include/libelf.h /usr/include/sys/types.h \
> /usr/include/features.h /usr/include/stdc-predef.h \
> /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
> /usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
> /usr/include/bits/types.h /usr/include/bits/typesizes.h \
> /usr/include/time.h \
> /usr/lib/gcc/x86_64-redhat-linux/4.7.2/include/stddef.h \
> /usr/include/endian.h /usr/include/bits/endian.h \
> /usr/include/bits/byteswap.h /usr/include/bits/byteswap-16.h \
> /usr/include/sys/select.h /usr/include/bits/select.h \
> /usr/include/bits/sigset.h /usr/include/bits/time.h \
> /usr/include/sys/sysmacros.h /usr/include/bits/pthreadtypes.h \
> /usr/include/elf.h \
> /usr/lib/gcc/x86_64-redhat-linux/4.7.2/include/stdint.h \
> /usr/include/stdint.h /usr/include/bits/wchar.h
>
> In this case we are using this for feature-checking, so I guess it'd
> fail to check the feature after upgrade.
The dependencies are re-made by GCC if a target fails and is rebuilt - and
that should include the new header locations.
I checked out the parent commit (8f7c1d07ade5) which still had full -M,
and this is how it utilized dependencies:
# let .d file also depends on the source and header files
define check_deps
@set -e; $(RM) $@; \
$(CC) -M $(CFLAGS) $< > $@.$$$$; \
sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
$(RM) $@.$$$$
endef
that's not a very robust method either: .d files should be generated via
-MD not via -M and should be included directly into the Makefile, like I
did it in my patch:
-include *.d */*.d
and the .d files themselves are never added as dependencies - they are
re-made by compilation automatically, not by any explicit Makefile rule.
Adding them as dependencies risks circular dependencies, because the only
method to rebuild a .d file is to actually meet the dependencies of a .c
target.
So if done properly I don't think the build failure cited in that
changelog can trigger.
Now, I cannot vouch for -MD blindly, without having seen a lot more
testing, so we might still be forced to disable or limit that auto-dep
trick, but the reasons cited in b6f4f804108b don't seem to be a GCC bug
but a Makefile bug - they just weren't fully understood back then.
Thanks,
Ingo
On Tue, 1 Oct 2013 16:46:18 +0200, Ingo Molnar wrote:
[SNIP]
>
> -$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
> +$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
> @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
> + @touch $(OUTPUT)PERF-VERSION-FILE
>
> CC = $(CROSS_COMPILE)gcc
> AR = $(CROSS_COMPILE)ar
> @@ -813,7 +814,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBLK)-c
> $(RM) $(OUTPUT)util/*-flex*
> $(python-clean)
>
> +#
> +# Trick: if ../../.git does not exist - we are building out of tree for example,
> +# then force version regeneration:
> +#
> +ifeq ($(wildcard ../../.git/HEAD),)
> + GIT-HEAD-PHONY = ../../.git/HEAD
> +else
> + GIT-HEAD-PHONY =
> +endif
> +
> .PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
> .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
> -.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
> +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
>
I found an unrelated bug. When I tried to build perf out of tree after
'make perf-targz-src-pkg' it cannot get any version number.
GEN common-cmds.h
* new build flags or prefix
make[1]: *** No rule to make target `kernelversion'. Stop.
PERF_VERSION =
...
AR libperf.a
LINK perf
$ ./perf --version
perf version
It looks like we should save and pass the result of 'make kernelversion'
to the tarball.
Thanks,
Namhyung
* Namhyung Kim <[email protected]> wrote:
> On Tue, 1 Oct 2013 16:46:18 +0200, Ingo Molnar wrote:
>
> [SNIP]
> >
> > -$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
> > +$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
> > @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
> > + @touch $(OUTPUT)PERF-VERSION-FILE
> >
> > CC = $(CROSS_COMPILE)gcc
> > AR = $(CROSS_COMPILE)ar
> > @@ -813,7 +814,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBLK)-c
> > $(RM) $(OUTPUT)util/*-flex*
> > $(python-clean)
> >
> > +#
> > +# Trick: if ../../.git does not exist - we are building out of tree for example,
> > +# then force version regeneration:
> > +#
> > +ifeq ($(wildcard ../../.git/HEAD),)
> > + GIT-HEAD-PHONY = ../../.git/HEAD
> > +else
> > + GIT-HEAD-PHONY =
> > +endif
> > +
> > .PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
> > .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
> > -.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
> > +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
> >
>
> I found an unrelated bug. When I tried to build perf out of tree after
> 'make perf-targz-src-pkg' it cannot get any version number.
>
> GEN common-cmds.h
> * new build flags or prefix
> make[1]: *** No rule to make target `kernelversion'. Stop.
> PERF_VERSION =
> ...
> AR libperf.a
> LINK perf
>
> $ ./perf --version
> perf version
Is this unrelated to my patches?
> It looks like we should save and pass the result of 'make kernelversion'
> to the tarball.
Or we could simply generate PERF-VERSION-FILE and save it into the
tarball?
Then it will be the most accurate version we can determine at the point
when the tarball is generated.
Thanks,
Ingo
* Linus Torvalds <[email protected]> wrote:
> [...]
>
> Another annoyance during that make was that "make install" seems to want
> to re-make the thing I just built. That's absolutely horrible, even if
> I've seen too many broken projects do that. Now, for perf it's not as
> horrible as for some (because you can do "make install" as a normal
> user), but it's still a pattern that needs to be called out and needs to
> die. It's not just that it slows down "make install", it's also that a
> normal pattern *should* be that you build things as a normal user, and
> do "make install" as root.
Ok, I think I managed to fix this bug too - see the patch below. Only
lightly tested, with the most common usecases.
Thanks,
Ingo
=========================>
Subject: tools/perf: Fix double/triple-build of the feature detection logic during 'make install' et al
From: Ingo Molnar <[email protected]>
Date: Wed Oct 2 06:51:27 CEST 2013
Linus reported the following perf build system bug:
'Another annoyance during that make was that "make install" seems to
want to re-make the thing I just built. That's absolutely horrible, [...]'
The targets that got re-built were 'only' the (numerous) feature checks,
not the whole project - but still it was mighty annoying as the feature
checks took 9+ seconds even on reasonably fast boxes...
Even with the autodep patches where feature detection is much faster
double/triple checks waste resources, waste screen real estate and confuse
users.
There were two sources for these unnecessary re-builds of the feature
checks:
- Unnecessary nested invocations of $(MAKE), apparently to be able
to do conditional compilation dependent on documentation tools
presence. Use straight dependencies instead, with no nesting.
- A direct invocation of $(MAKE) to rebuild the PERF-VERSION-FILE.
This is apparently done to be able to include it into the
Makefile:
-include $(OUTPUT)PERF-VERSION-FILE
but that's entirely pointless for two reasons: 1) the version file
gets regenerated by the initial build pass anyway, 2) including it
is futile, given its contents:
#define PERF_VERSION "3.12.rc3.g8510c7"
'make' will interpret that as a comment line...
So just remove this part of the doc-generation logic.
With these things fixed a 'make install' now rebuilds only what is needed.
A repeated 'make install' on an already built tree is super fast now, it
finishes in under 0.3 seconds:
#
# After the patch:
#
$ time make install
...
real 0m0.280s
user 0m0.162s
sys 0m0.054s
Prior all the autodep changes and prior this fix, a repeat 'make install'
took 24.1 seconds (!) on the same system:
#
# Before the patches:
#
$ time make install
...
real 0m24.109s
user 0m21.171s
sys 0m2.449s
Which almost entirely was caused by fixable build system fat.
We are now literally ~86 times faster.
A fresh rebuild and install now takes just 11.4 seconds:
#
# After the patch:
#
$ make clean
$ time make -j16 install
...
real 0m11.457s
user 1m43.411s
sys 0m7.610s
Without the patches it took 27.8 seconds:
#
# Before the patches:
#
$ make clean
$ time make -j16 install
...
real 0m27.801s
user 1m59.242s
sys 0m9.749s
So even in the complete rebuild case we are now ~2.5 times faster.
Reported-by: Linus Torvalds <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Jiri Olsa <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Documentation/Makefile | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)
Index: tip/tools/perf/Documentation/Makefile
===================================================================
--- tip.orig/tools/perf/Documentation/Makefile
+++ tip/tools/perf/Documentation/Makefile
@@ -192,13 +192,14 @@ do-install-man: man
install-man: check-man-tools man
-try-install-man:
ifdef missing_tools
- $(warning Please install $(missing_tools) to have the man pages installed)
+ DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
else
- $(MAKE) do-install-man
+ DO_INSTALL_MAN = do-install-man
endif
+try-install-man: $(DO_INSTALL_MAN)
+
install-info: info
$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
$(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
@@ -216,14 +217,6 @@ install-pdf: pdf
#install-html: html
# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(MAKECMDGOALS),tags)
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
- $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
-
--include $(OUTPUT)PERF-VERSION-FILE
-endif
-endif
#
# Determine "include::" file references in asciidoc files.
@@ -342,5 +335,3 @@ $(patsubst %.txt,%.html,$(wildcard howto
#quick-install-html:
# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
-
-.PHONY: .FORCE-PERF-VERSION-FILE
On Wed, 2 Oct 2013 08:50:42 +0200, Ingo Molnar wrote:
> * Namhyung Kim <[email protected]> wrote:
>
>> On Tue, 1 Oct 2013 16:46:18 +0200, Ingo Molnar wrote:
>>
>> [SNIP]
>> >
>> > -$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
>> > +$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
>> > @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
>> > + @touch $(OUTPUT)PERF-VERSION-FILE
>> >
>> > CC = $(CROSS_COMPILE)gcc
>> > AR = $(CROSS_COMPILE)ar
>> > @@ -813,7 +814,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBLK)-c
>> > $(RM) $(OUTPUT)util/*-flex*
>> > $(python-clean)
>> >
>> > +#
>> > +# Trick: if ../../.git does not exist - we are building out of tree for example,
>> > +# then force version regeneration:
>> > +#
>> > +ifeq ($(wildcard ../../.git/HEAD),)
>> > + GIT-HEAD-PHONY = ../../.git/HEAD
>> > +else
>> > + GIT-HEAD-PHONY =
>> > +endif
>> > +
>> > .PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
>> > .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
>> > -.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
>> > +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
>> >
>>
>> I found an unrelated bug. When I tried to build perf out of tree after
>> 'make perf-targz-src-pkg' it cannot get any version number.
>>
>> GEN common-cmds.h
>> * new build flags or prefix
>> make[1]: *** No rule to make target `kernelversion'. Stop.
>> PERF_VERSION =
>> ...
>> AR libperf.a
>> LINK perf
>>
>> $ ./perf --version
>> perf version
>
> Is this unrelated to my patches?
Yes, I can reproduce it without your patch.
>
>> It looks like we should save and pass the result of 'make kernelversion'
>> to the tarball.
>
> Or we could simply generate PERF-VERSION-FILE and save it into the
> tarball?
>
> Then it will be the most accurate version we can determine at the point
> when the tarball is generated.
Hmm.. I tried to do it, but it's not that simple than I thought. With
your patch, the PERF-VERSION-FILE is regenerated everytime if it can't
find .git/HEAD so saving the file didn't help since it'll be
overwritten. Anyway, it also cannot be used with 'make O=...' build.
Thanks,
Namhyung
On Mon, Sep 30, 2013 at 06:42:10PM +0200, Ingo Molnar wrote:
>
> This series (with combo patch attached) implements (much) faster
> perf-tools feature-auto-detection.
>
> I used 3 tricks to implement feature auto-dependencies and to speed up
> feature detection:
>
> - standalone Makefile in config/feature-checks/ built in parallel
>
> - split-out standalone .c files in config/feature-checks/*.c
>
> - used GCC's auto-dependency generation feature (-MD) to track the
> effects of system library addition/removal.
>
> Before the changes a fully cached re-build of an already built tree took
> 2.6 second:
>
> $ perf stat --null --sync --repeat 3 -a make Makefile
>
> 2.669467209 seconds time elapsed ( +- 0.07% )
>
> After the changes it takes only 0.6 seconds:
>
> 0.599161560 seconds time elapsed ( +- 0.25% )
nice!
>
> So the empty build got 4.4x faster.
>
SNIP
> ifdef NO_DEMANGLE
> CFLAGS += -DNO_DEMANGLE
> else
> - ifdef HAVE_CPLUS_DEMANGLE
> + ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
> EXTLIBS += -liberty
> - CFLAGS += -DHAVE_CPLUS_DEMANGLE
> + CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
> else
> FLAGS_BFD=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd
> - has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd)
> - ifeq ($(has_bfd),y)
> + ifeq ($(feature-libbfd), 1)
> EXTLIBS += -lbfd
> else
> FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
> @@ -388,7 +410,7 @@ else
> has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle)
> ifeq ($(has_cplus_demangle),y)
> EXTLIBS += -liberty
> - CFLAGS += -DHAVE_CPLUS_DEMANGLE
> + CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
> else
> msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
> CFLAGS += -DNO_DEMANGLE
> @@ -400,30 +422,29 @@ else
> endif
Why not add tests for has_bfd_iberty/has_bfd_iberty_z/has_cplus_demangle?
We could remove the config/feature-tests.mak completely
and other cleanup like remove try-cc.
jirka
This patch below fixes another tools/perf build system annoyance: that it
does not build in parallel by default.
The effect is that for example 'make install' will build in parallel
(dependent on number of CPUs in the system) and then install the result.
Thanks,
Ingo
=======================>
Subject: tools/perf/build: Automatically build in parallel, based on number of CPUs in the system
From: Ingo Molnar <[email protected]>
Date: Wed Oct 2 11:18:28 CEST 2013
Implement automatic parallel builds when building in tools/perf:
$ time make
# [ perf build: Doing 'make -j12' parallel build. ]
Auto-detecting system features:
...
real 0m9.265s
user 0m59.888s
sys 0m6.082s
On GNU make achieving this is not particularly easy, it requires a separate
makefile, which then invokes the main Makefile.
( Note: this patch adds Makefile.parallel to show the concept - the two
makefiles will be flipped in the next patch to avoid having to specify -f
to get parallelism in the default build. )
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: David Ahern <[email protected]>
Cc: Jiri Olsa <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Makefile.parallel | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
Index: tip/tools/perf/Makefile.parallel
===================================================================
--- /dev/null
+++ tip/tools/perf/Makefile.parallel
@@ -0,0 +1,26 @@
+#
+# Do a parallel build with multiple jobs, based on the number of CPUs online
+# in this system: 'make -j8' on a 8-CPU system, etc.
+#
+# (To override it, run 'make JOBS=1' and similar.)
+#
+ifeq ($(JOBS),)
+ JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null)
+ ifeq ($(JOBS),)
+ JOBS := 1
+ endif
+endif
+
+export JOBS
+
+$(info $(shell printf '# [ perf build: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build. ]\n'))
+
+#
+# Needed if no target specified:
+#
+all:
+ @$(MAKE) --no-print-directory -j$(JOBS) $@
+
+%:
+ @$(MAKE) --no-print-directory -j$(JOBS) $@
+
* Jiri Olsa <[email protected]> wrote:
> > @@ -388,7 +410,7 @@ else
> > has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle)
> > ifeq ($(has_cplus_demangle),y)
> > EXTLIBS += -liberty
> > - CFLAGS += -DHAVE_CPLUS_DEMANGLE
> > + CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
> > else
> > msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
> > CFLAGS += -DNO_DEMANGLE
> > @@ -400,30 +422,29 @@ else
> > endif
>
> Why not add tests for
> has_bfd_iberty/has_bfd_iberty_z/has_cplus_demangle?
Yeah, did not get to that stage yet: I only converted the feature tests
that trigger on my system (which is about 80% of them). Will complete the
transition.
> We could remove the config/feature-tests.mak completely and other
> cleanup like remove try-cc.
Yeah - that will be a nice simplification!
Thanks,
Ingo