Hi Ingo,
Please consider pulling from:
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux-2.6 perf/core
Regards,
- Arnaldo
Arnaldo Carvalho de Melo (4):
perf events: Separate the routines handling the PERF_SAMPLE_ identity fields
perf events: Make sample_type identity fields available in all PERF_RECORD_ events
perf session: Parse sample earlier
perf tools: Ask for ID PERF_SAMPLE_ info on all PERF_RECORD_ events
Thomas Gleixner (1):
perf events: Fix event inherit fallout of precalculated headers
include/linux/perf_event.h | 13 +-
kernel/perf_event.c | 234 +++++++++++++++------
tools/perf/Documentation/perf-record.txt | 5 +
tools/perf/builtin-annotate.c | 6 +-
tools/perf/builtin-diff.c | 11 +-
tools/perf/builtin-inject.c | 39 ++--
tools/perf/builtin-kmem.c | 21 +--
tools/perf/builtin-lock.c | 12 +-
tools/perf/builtin-record.c | 23 ++
tools/perf/builtin-report.c | 15 +-
tools/perf/builtin-sched.c | 21 +--
tools/perf/builtin-script.c | 34 +--
tools/perf/builtin-timechart.c | 40 ++--
tools/perf/builtin-top.c | 12 +-
tools/perf/util/build-id.c | 7 +-
tools/perf/util/event.c | 341 +++++++++++++++++++----------
tools/perf/util/event.h | 27 ++-
tools/perf/util/header.c | 29 ++-
tools/perf/util/header.h | 1 +
tools/perf/util/hist.h | 2 +
tools/perf/util/session.c | 159 ++++++++++++---
tools/perf/util/session.h | 12 +-
22 files changed, 717 insertions(+), 347 deletions(-)
From: Arnaldo Carvalho de Melo <[email protected]>
Those will be made available in sample like events like MMAP, EXEC, etc in a
followup patch. So precalculate the extra id header space and have a separate
routine to fill them up.
V2: Thomas noticed that the id header needs to be precalculated at
inherit_events too:
LKML-Reference: <[email protected]>
Tested-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Acked-by: Ian Munsie <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Frédéric Weisbecker <[email protected]>
Cc: Ian Munsie <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Thomas Gleixner <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
include/linux/perf_event.h | 1 +
kernel/perf_event.c | 129 +++++++++++++++++++++++++------------------
2 files changed, 76 insertions(+), 54 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index adf6d99..b9950b1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -759,6 +759,7 @@ struct perf_event {
struct perf_event_attr attr;
u16 header_size;
+ u16 id_header_size;
u16 read_size;
struct hw_perf_event hw;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7961b27..a047997 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -133,6 +133,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
}
}
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+ /*
+ * only top level events have the pid namespace they were created in
+ */
+ if (event->parent)
+ event = event->parent;
+
+ return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+ /*
+ * only top level events have the pid namespace they were created in
+ */
+ if (event->parent)
+ event = event->parent;
+
+ return task_pid_nr_ns(p, event->ns);
+}
+
/*
* If we inherit events we want to return the parent event id
* to userspace.
@@ -351,15 +373,30 @@ static void perf_event__header_size(struct perf_event *event)
if (sample_type & PERF_SAMPLE_IP)
size += sizeof(data->ip);
+ if (sample_type & PERF_SAMPLE_ADDR)
+ size += sizeof(data->addr);
+
+ if (sample_type & PERF_SAMPLE_PERIOD)
+ size += sizeof(data->period);
+
+ if (sample_type & PERF_SAMPLE_READ)
+ size += event->read_size;
+
+ event->header_size = size;
+}
+
+static void perf_event__id_header_size(struct perf_event *event)
+{
+ struct perf_sample_data *data;
+ u64 sample_type = event->attr.sample_type;
+ u16 size = 0;
+
if (sample_type & PERF_SAMPLE_TID)
size += sizeof(data->tid_entry);
if (sample_type & PERF_SAMPLE_TIME)
size += sizeof(data->time);
- if (sample_type & PERF_SAMPLE_ADDR)
- size += sizeof(data->addr);
-
if (sample_type & PERF_SAMPLE_ID)
size += sizeof(data->id);
@@ -369,13 +406,7 @@ static void perf_event__header_size(struct perf_event *event)
if (sample_type & PERF_SAMPLE_CPU)
size += sizeof(data->cpu_entry);
- if (sample_type & PERF_SAMPLE_PERIOD)
- size += sizeof(data->period);
-
- if (sample_type & PERF_SAMPLE_READ)
- size += event->read_size;
-
- event->header_size = size;
+ event->id_header_size = size;
}
static void perf_group_attach(struct perf_event *event)
@@ -3357,6 +3388,36 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
} while (len);
}
+static void perf_event_header__init_id(struct perf_event_header *header,
+ struct perf_sample_data *data,
+ struct perf_event *event)
+{
+ u64 sample_type = event->attr.sample_type;
+
+ data->type = sample_type;
+ header->size += event->id_header_size;
+
+ if (sample_type & PERF_SAMPLE_TID) {
+ /* namespace issues */
+ data->tid_entry.pid = perf_event_pid(event, current);
+ data->tid_entry.tid = perf_event_tid(event, current);
+ }
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ data->time = perf_clock();
+
+ if (sample_type & PERF_SAMPLE_ID)
+ data->id = primary_event_id(event);
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ data->stream_id = event->id;
+
+ if (sample_type & PERF_SAMPLE_CPU) {
+ data->cpu_entry.cpu = raw_smp_processor_id();
+ data->cpu_entry.reserved = 0;
+ }
+}
+
int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
int nmi, int sample)
@@ -3459,28 +3520,6 @@ void perf_output_end(struct perf_output_handle *handle)
rcu_read_unlock();
}
-static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
-{
- /*
- * only top level events have the pid namespace they were created in
- */
- if (event->parent)
- event = event->parent;
-
- return task_tgid_nr_ns(p, event->ns);
-}
-
-static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
-{
- /*
- * only top level events have the pid namespace they were created in
- */
- if (event->parent)
- event = event->parent;
-
- return task_pid_nr_ns(p, event->ns);
-}
-
static void perf_output_read_one(struct perf_output_handle *handle,
struct perf_event *event,
u64 enabled, u64 running)
@@ -3655,37 +3694,17 @@ void perf_prepare_sample(struct perf_event_header *header,
{
u64 sample_type = event->attr.sample_type;
- data->type = sample_type;
-
header->type = PERF_RECORD_SAMPLE;
header->size = sizeof(*header) + event->header_size;
header->misc = 0;
header->misc |= perf_misc_flags(regs);
+ perf_event_header__init_id(header, data, event);
+
if (sample_type & PERF_SAMPLE_IP)
data->ip = perf_instruction_pointer(regs);
- if (sample_type & PERF_SAMPLE_TID) {
- /* namespace issues */
- data->tid_entry.pid = perf_event_pid(event, current);
- data->tid_entry.tid = perf_event_tid(event, current);
- }
-
- if (sample_type & PERF_SAMPLE_TIME)
- data->time = perf_clock();
-
- if (sample_type & PERF_SAMPLE_ID)
- data->id = primary_event_id(event);
-
- if (sample_type & PERF_SAMPLE_STREAM_ID)
- data->stream_id = event->id;
-
- if (sample_type & PERF_SAMPLE_CPU) {
- data->cpu_entry.cpu = raw_smp_processor_id();
- data->cpu_entry.reserved = 0;
- }
-
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
@@ -5745,6 +5764,7 @@ SYSCALL_DEFINE5(perf_event_open,
* Precalculate sample_data sizes
*/
perf_event__header_size(event);
+ perf_event__id_header_size(event);
/*
* Drop the reference on the group_event after placing the
@@ -6102,6 +6122,7 @@ inherit_event(struct perf_event *parent_event,
* Precalculate sample_data sizes
*/
perf_event__header_size(child_event);
+ perf_event__id_header_size(child_event);
/*
* Link it up in the child's context:
--
1.6.2.5
From: Arnaldo Carvalho de Melo <[email protected]>
So that we can use -T == --timestamp, asking for PERF_SAMPLE_TIME:
$ perf record -aT
$ perf report -D | grep PERF_RECORD_
<SNIP>
3 5951915425 0x47530 [0x58]: PERF_RECORD_SAMPLE(IP, 1): 16811/16811: 0xffffffff8138c1a2 period: 215979 cpu:3
3 5952026879 0x47588 [0x90]: PERF_RECORD_SAMPLE(IP, 1): 16811/16811: 0xffffffff810cb480 period: 215979 cpu:3
3 5952059959 0x47618 [0x38]: PERF_RECORD_FORK(6853:6853):(16811:16811)
3 5952138878 0x47650 [0x78]: PERF_RECORD_SAMPLE(IP, 1): 16811/16811: 0xffffffff811bac35 period: 431478 cpu:3
3 5952375068 0x476c8 [0x30]: PERF_RECORD_COMM: find:6853
3 5952395923 0x476f8 [0x50]: PERF_RECORD_MMAP 6853/6853: [0x400000(0x25000) @ 0]: /usr/bin/find
3 5952413756 0x47748 [0xa0]: PERF_RECORD_SAMPLE(IP, 1): 6853/6853: 0xffffffff810d080f period: 859332 cpu:3
3 5952419837 0x477e8 [0x58]: PERF_RECORD_MMAP 6853/6853: [0x3f44600000(0x21d000) @ 0]: /lib64/ld-2.5.so
3 5952437929 0x47840 [0x48]: PERF_RECORD_MMAP 6853/6853: [0x7fff7e1c9000(0x1000) @ 0x7fff7e1c9000]: [vdso]
3 5952570127 0x47888 [0x58]: PERF_RECORD_MMAP 6853/6853: [0x3f46200000(0x218000) @ 0]: /lib64/libselinux.so.1
3 5952623637 0x478e0 [0x58]: PERF_RECORD_MMAP 6853/6853: [0x3f44a00000(0x356000) @ 0]: /lib64/libc-2.5.so
3 5952675720 0x47938 [0x58]: PERF_RECORD_MMAP 6853/6853: [0x3f44e00000(0x204000) @ 0]: /lib64/libdl-2.5.so
3 5952710080 0x47990 [0x58]: PERF_RECORD_MMAP 6853/6853: [0x3f45a00000(0x246000) @ 0]: /lib64/libsepol.so.1
3 5952847802 0x479e8 [0x58]: PERF_RECORD_SAMPLE(IP, 1): 6853/6853: 0xffffffff813897f0 period: 1142536 cpu:3
<SNIP>
First column is the cpu and the second the timestamp.
That way we can investigate problems in the event stream.
If the new perf binary is run on an older kernel, it will disable this feature
automatically.
Tested-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Acked-by: Ian Munsie <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Frédéric Weisbecker <[email protected]>
Cc: Ian Munsie <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Stephane Eranian <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 5 +
tools/perf/builtin-record.c | 16 ++
tools/perf/builtin-top.c | 14 +-
tools/perf/util/event.c | 285 +++++++++++++++++++++---------
tools/perf/util/event.h | 7 +-
tools/perf/util/header.c | 18 ++
tools/perf/util/header.h | 1 +
tools/perf/util/session.c | 66 +++++++-
tools/perf/util/session.h | 3 +
9 files changed, 315 insertions(+), 100 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 0ad1bc7..52462ae 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -108,6 +108,11 @@ OPTIONS
--data::
Sample addresses.
+-T::
+--timestamp::
+ Sample timestamps. Use it with 'perf report -D' to see the timestamps,
+ for instance.
+
-n::
--no-samples::
Don't sample.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b34de92..699dd21 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,6 +49,7 @@ static const char *output_name = "perf.data";
static int group = 0;
static int realtime_prio = 0;
static bool raw_samples = false;
+static bool sample_id_all_avail = true;
static bool system_wide = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
@@ -61,6 +62,7 @@ static bool call_graph = false;
static bool inherit_stat = false;
static bool no_samples = false;
static bool sample_address = false;
+static bool sample_time = false;
static bool no_buildid = false;
static bool no_buildid_cache = false;
@@ -283,6 +285,9 @@ static void create_counter(int counter, int cpu)
if (system_wide)
attr->sample_type |= PERF_SAMPLE_CPU;
+ if (sample_time)
+ attr->sample_type |= PERF_SAMPLE_TIME;
+
if (raw_samples) {
attr->sample_type |= PERF_SAMPLE_TIME;
attr->sample_type |= PERF_SAMPLE_RAW;
@@ -299,6 +304,8 @@ static void create_counter(int counter, int cpu)
attr->disabled = 1;
attr->enable_on_exec = 1;
}
+retry_sample_id:
+ attr->sample_id_all = sample_id_all_avail ? 1 : 0;
for (thread_index = 0; thread_index < thread_num; thread_index++) {
try_again:
@@ -315,6 +322,12 @@ try_again:
else if (err == ENODEV && cpu_list) {
die("No such device - did you specify"
" an out-of-range profile CPU?\n");
+ } else if (err == EINVAL && sample_id_all_avail) {
+ /*
+ * Old kernel, no attr->sample_id_type_all field
+ */
+ sample_id_all_avail = false;
+ goto retry_sample_id;
}
/*
@@ -661,6 +674,8 @@ static int __cmd_record(int argc, const char **argv)
post_processing_offset = lseek(output, 0, SEEK_CUR);
+ perf_session__set_sample_id_all(session, sample_id_all_avail);
+
if (pipe_output) {
err = event__synthesize_attrs(&session->header,
process_synthesized_event,
@@ -841,6 +856,7 @@ const struct option record_options[] = {
"per thread counts"),
OPT_BOOLEAN('d', "data", &sample_address,
"Sample addresses"),
+ OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
OPT_BOOLEAN('n', "no-samples", &no_samples,
"don't sample"),
OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5aa29e1..0515ce9 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -977,12 +977,12 @@ static int symbol_filter(struct map *map, struct symbol *sym)
}
static void event__process_sample(const event_t *self,
- struct perf_session *session, int counter)
+ struct sample_data *sample,
+ struct perf_session *session, int counter)
{
u64 ip = self->ip.ip;
struct sym_entry *syme;
struct addr_location al;
- struct sample_data data;
struct machine *machine;
u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -1025,9 +1025,7 @@ static void event__process_sample(const event_t *self,
if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
exact_samples++;
- event__parse_sample(self, session->sample_type, &data);
-
- if (event__preprocess_sample(self, session, &al, &data,
+ if (event__preprocess_sample(self, session, &al, sample,
symbol_filter) < 0 ||
al.filtered)
return;
@@ -1107,6 +1105,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
unsigned int head = mmap_read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
+ struct sample_data sample;
int diff;
/*
@@ -1154,10 +1153,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
event = &event_copy;
}
+ event__parse_sample(event, self, &sample);
if (event->header.type == PERF_RECORD_SAMPLE)
- event__process_sample(event, self, md->counter);
+ event__process_sample(event, &sample, self, md->counter);
else
- event__process(event, NULL, self);
+ event__process(event, &sample, self);
old += size;
}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 34510f4..e4cdc1e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -33,11 +33,10 @@ static struct sample_data synth_sample = {
.period = 1,
};
-static pid_t event__synthesize_comm(pid_t pid, int full,
+static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
event__handler_t process,
struct perf_session *session)
{
- event_t ev;
char filename[PATH_MAX];
char bf[BUFSIZ];
FILE *fp;
@@ -58,34 +57,39 @@ out_race:
return 0;
}
- memset(&ev.comm, 0, sizeof(ev.comm));
- while (!ev.comm.comm[0] || !ev.comm.pid) {
- if (fgets(bf, sizeof(bf), fp) == NULL)
- goto out_failure;
+ memset(&event->comm, 0, sizeof(event->comm));
+
+ while (!event->comm.comm[0] || !event->comm.pid) {
+ if (fgets(bf, sizeof(bf), fp) == NULL) {
+ pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
+ goto out;
+ }
if (memcmp(bf, "Name:", 5) == 0) {
char *name = bf + 5;
while (*name && isspace(*name))
++name;
size = strlen(name) - 1;
- memcpy(ev.comm.comm, name, size++);
+ memcpy(event->comm.comm, name, size++);
} else if (memcmp(bf, "Tgid:", 5) == 0) {
char *tgids = bf + 5;
while (*tgids && isspace(*tgids))
++tgids;
- tgid = ev.comm.pid = atoi(tgids);
+ tgid = event->comm.pid = atoi(tgids);
}
}
- ev.comm.header.type = PERF_RECORD_COMM;
+ event->comm.header.type = PERF_RECORD_COMM;
size = ALIGN(size, sizeof(u64));
- ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size);
-
+ memset(event->comm.comm + size, 0, session->id_hdr_size);
+ event->comm.header.size = (sizeof(event->comm) -
+ (sizeof(event->comm.comm) - size) +
+ session->id_hdr_size);
if (!full) {
- ev.comm.tid = pid;
+ event->comm.tid = pid;
- process(&ev, &synth_sample, session);
- goto out_fclose;
+ process(event, &synth_sample, session);
+ goto out;
}
snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -100,22 +104,19 @@ out_race:
if (*end)
continue;
- ev.comm.tid = pid;
+ event->comm.tid = pid;
- process(&ev, &synth_sample, session);
+ process(event, &synth_sample, session);
}
- closedir(tasks);
-out_fclose:
+ closedir(tasks);
+out:
fclose(fp);
- return tgid;
-out_failure:
- pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
- return -1;
+ return tgid;
}
-static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
+static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
event__handler_t process,
struct perf_session *session)
{
@@ -133,29 +134,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
return -1;
}
+ event->header.type = PERF_RECORD_MMAP;
+ /*
+ * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+ */
+ event->header.misc = PERF_RECORD_MISC_USER;
+
while (1) {
char bf[BUFSIZ], *pbf = bf;
- event_t ev = {
- .header = {
- .type = PERF_RECORD_MMAP,
- /*
- * Just like the kernel, see __perf_event_mmap
- * in kernel/perf_event.c
- */
- .misc = PERF_RECORD_MISC_USER,
- },
- };
int n;
size_t size;
if (fgets(bf, sizeof(bf), fp) == NULL)
break;
/* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
- n = hex2u64(pbf, &ev.mmap.start);
+ n = hex2u64(pbf, &event->mmap.start);
if (n < 0)
continue;
pbf += n + 1;
- n = hex2u64(pbf, &ev.mmap.len);
+ n = hex2u64(pbf, &event->mmap.len);
if (n < 0)
continue;
pbf += n + 3;
@@ -170,19 +167,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
continue;
pbf += 3;
- n = hex2u64(pbf, &ev.mmap.pgoff);
+ n = hex2u64(pbf, &event->mmap.pgoff);
size = strlen(execname);
execname[size - 1] = '\0'; /* Remove \n */
- memcpy(ev.mmap.filename, execname, size);
+ memcpy(event->mmap.filename, execname, size);
size = ALIGN(size, sizeof(u64));
- ev.mmap.len -= ev.mmap.start;
- ev.mmap.header.size = (sizeof(ev.mmap) -
- (sizeof(ev.mmap.filename) - size));
- ev.mmap.pid = tgid;
- ev.mmap.tid = pid;
-
- process(&ev, &synth_sample, session);
+ event->mmap.len -= event->mmap.start;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size));
+ memset(event->mmap.filename + size, 0, session->id_hdr_size);
+ event->mmap.header.size += session->id_hdr_size;
+ event->mmap.pid = tgid;
+ event->mmap.tid = pid;
+
+ process(event, &synth_sample, session);
}
}
@@ -196,20 +195,27 @@ int event__synthesize_modules(event__handler_t process,
{
struct rb_node *nd;
struct map_groups *kmaps = &machine->kmaps;
- u16 misc;
+ event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+ if (event == NULL) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for kernel modules\n");
+ return -1;
+ }
+
+ event->header.type = PERF_RECORD_MMAP;
/*
* kernel uses 0 for user space maps, see kernel/perf_event.c
* __perf_event_mmap
*/
if (machine__is_host(machine))
- misc = PERF_RECORD_MISC_KERNEL;
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
else
- misc = PERF_RECORD_MISC_GUEST_KERNEL;
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
nd; nd = rb_next(nd)) {
- event_t ev;
size_t size;
struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -217,39 +223,78 @@ int event__synthesize_modules(event__handler_t process,
continue;
size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
- memset(&ev, 0, sizeof(ev));
- ev.mmap.header.misc = misc;
- ev.mmap.header.type = PERF_RECORD_MMAP;
- ev.mmap.header.size = (sizeof(ev.mmap) -
- (sizeof(ev.mmap.filename) - size));
- ev.mmap.start = pos->start;
- ev.mmap.len = pos->end - pos->start;
- ev.mmap.pid = machine->pid;
-
- memcpy(ev.mmap.filename, pos->dso->long_name,
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size));
+ memset(event->mmap.filename + size, 0, session->id_hdr_size);
+ event->mmap.header.size += session->id_hdr_size;
+ event->mmap.start = pos->start;
+ event->mmap.len = pos->end - pos->start;
+ event->mmap.pid = machine->pid;
+
+ memcpy(event->mmap.filename, pos->dso->long_name,
pos->dso->long_name_len + 1);
- process(&ev, &synth_sample, session);
+ process(event, &synth_sample, session);
}
+ free(event);
return 0;
}
-int event__synthesize_thread(pid_t pid, event__handler_t process,
- struct perf_session *session)
+static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
+ pid_t pid, event__handler_t process,
+ struct perf_session *session)
{
- pid_t tgid = event__synthesize_comm(pid, 1, process, session);
+ pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
+ session);
if (tgid == -1)
return -1;
- return event__synthesize_mmap_events(pid, tgid, process, session);
+ return event__synthesize_mmap_events(mmap_event, pid, tgid,
+ process, session);
}
-void event__synthesize_threads(event__handler_t process,
- struct perf_session *session)
+int event__synthesize_thread(pid_t pid, event__handler_t process,
+ struct perf_session *session)
+{
+ event_t *comm_event, *mmap_event;
+ int err = -1;
+
+ comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+ if (comm_event == NULL)
+ goto out;
+
+ mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+ if (mmap_event == NULL)
+ goto out_free_comm;
+
+ err = __event__synthesize_thread(comm_event, mmap_event, pid,
+ process, session);
+ free(mmap_event);
+out_free_comm:
+ free(comm_event);
+out:
+ return err;
+}
+
+int event__synthesize_threads(event__handler_t process,
+ struct perf_session *session)
{
DIR *proc;
struct dirent dirent, *next;
+ event_t *comm_event, *mmap_event;
+ int err = -1;
+
+ comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+ if (comm_event == NULL)
+ goto out;
+
+ mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+ if (mmap_event == NULL)
+ goto out_free_comm;
proc = opendir("/proc");
+ if (proc == NULL)
+ goto out_free_mmap;
while (!readdir_r(proc, &dirent, &next) && next) {
char *end;
@@ -258,10 +303,18 @@ void event__synthesize_threads(event__handler_t process,
if (*end) /* only interested in proper numerical dirents */
continue;
- event__synthesize_thread(pid, process, session);
+ __event__synthesize_thread(comm_event, mmap_event, pid,
+ process, session);
}
closedir(proc);
+ err = 0;
+out_free_mmap:
+ free(mmap_event);
+out_free_comm:
+ free(comm_event);
+out:
+ return err;
}
struct process_symbol_args {
@@ -295,18 +348,20 @@ int event__synthesize_kernel_mmap(event__handler_t process,
char path[PATH_MAX];
char name_buff[PATH_MAX];
struct map *map;
-
- event_t ev = {
- .header = {
- .type = PERF_RECORD_MMAP,
- },
- };
+ int err;
/*
* We should get this from /sys/kernel/sections/.text, but till that is
* available use this, and after it is use this as a fallback for older
* kernels.
*/
struct process_symbol_args args = { .name = symbol_name, };
+ event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+ if (event == NULL) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for kernel modules\n");
+ return -1;
+ }
mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
if (machine__is_host(machine)) {
@@ -314,10 +369,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
* kernel uses PERF_RECORD_MISC_USER for user space maps,
* see kernel/perf_event.c __perf_event_mmap
*/
- ev.header.misc = PERF_RECORD_MISC_KERNEL;
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
filename = "/proc/kallsyms";
} else {
- ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
if (machine__is_default_guest(machine))
filename = (char *) symbol_conf.default_guest_kallsyms;
else {
@@ -330,17 +385,21 @@ int event__synthesize_kernel_mmap(event__handler_t process,
return -ENOENT;
map = machine->vmlinux_maps[MAP__FUNCTION];
- size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+ size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
"%s%s", mmap_name, symbol_name) + 1;
size = ALIGN(size, sizeof(u64));
- ev.mmap.header.size = (sizeof(ev.mmap) -
- (sizeof(ev.mmap.filename) - size));
- ev.mmap.pgoff = args.start;
- ev.mmap.start = map->start;
- ev.mmap.len = map->end - ev.mmap.start;
- ev.mmap.pid = machine->pid;
-
- return process(&ev, &synth_sample, session);
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
+ event->mmap.pgoff = args.start;
+ event->mmap.start = map->start;
+ event->mmap.len = map->end - event->mmap.start;
+ event->mmap.pid = machine->pid;
+
+ err = process(event, &synth_sample, session);
+ free(event);
+
+ return err;
}
static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -756,9 +815,65 @@ out_filtered:
return 0;
}
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
+static int event__parse_id_sample(const event_t *event,
+ struct perf_session *session,
+ struct sample_data *sample)
{
- const u64 *array = event->sample.array;
+ const u64 *array;
+ u64 type;
+
+ sample->cpu = sample->pid = sample->tid = -1;
+ sample->stream_id = sample->id = sample->time = -1ULL;
+
+ if (!session->sample_id_all)
+ return 0;
+
+ array = event->sample.array;
+ array += ((event->header.size -
+ sizeof(event->header)) / sizeof(u64)) - 1;
+ type = session->sample_type;
+
+ if (type & PERF_SAMPLE_CPU) {
+ u32 *p = (u32 *)array;
+ sample->cpu = *p;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ sample->stream_id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ sample->time = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u32 *p = (u32 *)array;
+ sample->pid = p[0];
+ sample->tid = p[1];
+ }
+
+ return 0;
+}
+
+int event__parse_sample(const event_t *event, struct perf_session *session,
+ struct sample_data *data)
+{
+ const u64 *array;
+ u64 type;
+
+ if (event->header.type != PERF_RECORD_SAMPLE)
+ return event__parse_id_sample(event, session, data);
+
+ array = event->sample.array;
+ type = session->sample_type;
if (type & PERF_SAMPLE_IP) {
data->ip = event->ip.ip;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 08c400b..a95ab18 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -142,8 +142,8 @@ typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
int event__synthesize_thread(pid_t pid, event__handler_t process,
struct perf_session *session);
-void event__synthesize_threads(event__handler_t process,
- struct perf_session *session);
+int event__synthesize_threads(event__handler_t process,
+ struct perf_session *session);
int event__synthesize_kernel_mmap(event__handler_t process,
struct perf_session *session,
struct machine *machine,
@@ -168,7 +168,8 @@ struct addr_location;
int event__preprocess_sample(const event_t *self, struct perf_session *session,
struct addr_location *al, struct sample_data *data,
symbol_filter_t filter);
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
+int event__parse_sample(const event_t *event, struct perf_session *session,
+ struct sample_data *sample);
extern const char *event__name[];
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index fe652f3..073f0e1 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -946,6 +946,24 @@ u64 perf_header__sample_type(struct perf_header *header)
return type;
}
+bool perf_header__sample_id_all(const struct perf_header *header)
+{
+ bool value = false, first = true;
+ int i;
+
+ for (i = 0; i < header->attrs; i++) {
+ struct perf_header_attr *attr = header->attr[i];
+
+ if (first) {
+ value = attr->attr.sample_id_all;
+ first = false;
+ } else if (value != attr->attr.sample_id_all)
+ die("non matching sample_id_all");
+ }
+
+ return value;
+}
+
struct perf_event_attr *
perf_header__find_attr(u64 id, struct perf_header *header)
{
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index ed550bf..6335965 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -81,6 +81,7 @@ void perf_header_attr__delete(struct perf_header_attr *self);
int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
u64 perf_header__sample_type(struct perf_header *header);
+bool perf_header__sample_id_all(const struct perf_header *header);
struct perf_event_attr *
perf_header__find_attr(u64 id, struct perf_header *header);
void perf_header__set_feat(struct perf_header *self, int feat);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 08ec018..5c75660 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -65,9 +65,37 @@ out_close:
return -1;
}
-void perf_session__update_sample_type(struct perf_session *self)
+static void perf_session__id_header_size(struct perf_session *session)
{
- self->sample_type = perf_header__sample_type(&self->header);
+ struct sample_data *data;
+ u64 sample_type = session->sample_type;
+ u16 size = 0;
+
+ if (!session->sample_id_all)
+ goto out;
+
+ if (sample_type & PERF_SAMPLE_TID)
+ size += sizeof(data->tid) * 2;
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ size += sizeof(data->time);
+
+ if (sample_type & PERF_SAMPLE_ID)
+ size += sizeof(data->id);
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ size += sizeof(data->stream_id);
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ size += sizeof(data->cpu) * 2;
+out:
+ session->id_hdr_size = size;
+}
+
+void perf_session__set_sample_id_all(struct perf_session *session, bool value)
+{
+ session->sample_id_all = value;
+ perf_session__id_header_size(session);
}
void perf_session__set_sample_type(struct perf_session *session, u64 type)
@@ -75,6 +103,13 @@ void perf_session__set_sample_type(struct perf_session *session, u64 type)
session->sample_type = type;
}
+void perf_session__update_sample_type(struct perf_session *self)
+{
+ self->sample_type = perf_header__sample_type(&self->header);
+ self->sample_id_all = perf_header__sample_id_all(&self->header);
+ perf_session__id_header_size(self);
+}
+
int perf_session__create_kernel_maps(struct perf_session *self)
{
int ret = machine__create_kernel_maps(&self->host_machine);
@@ -443,7 +478,7 @@ static void flush_sample_queue(struct perf_session *s,
if (iter->timestamp > limit)
break;
- event__parse_sample(iter->event, s->sample_type, &sample);
+ event__parse_sample(iter->event, s, &sample);
ops->sample(iter->event, &sample, s);
os->last_flush = iter->timestamp;
@@ -618,6 +653,23 @@ static void callchain__dump(struct sample_data *sample)
printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
}
+static void perf_session__print_tstamp(struct perf_session *session,
+ event_t *event,
+ struct sample_data *sample)
+{
+ if (event->header.type != PERF_RECORD_SAMPLE &&
+ !session->sample_id_all) {
+ fputs("-1 -1 ", stdout);
+ return;
+ }
+
+ if ((session->sample_type & PERF_SAMPLE_CPU))
+ printf("%u ", sample->cpu);
+
+ if (session->sample_type & PERF_SAMPLE_TIME)
+ printf("%Lu ", sample->time);
+}
+
static int perf_session__process_event(struct perf_session *self,
event_t *event,
struct perf_event_ops *ops,
@@ -630,8 +682,12 @@ static int perf_session__process_event(struct perf_session *self,
if (self->header.needs_swap && event__swap_ops[event->header.type])
event__swap_ops[event->header.type](event);
- if (event->header.type == PERF_RECORD_SAMPLE)
- event__parse_sample(event, self->sample_type, &sample);
+ if (event->header.type >= PERF_RECORD_MMAP &&
+ event->header.type <= PERF_RECORD_SAMPLE) {
+ event__parse_sample(event, self, &sample);
+ if (dump_trace)
+ perf_session__print_tstamp(self, event, &sample);
+ }
if (event->header.type < PERF_RECORD_HEADER_MAX) {
dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 4578f86..ac36f99 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -46,6 +46,8 @@ struct perf_session {
int fd;
bool fd_pipe;
bool repipe;
+ bool sample_id_all;
+ u16 id_hdr_size;
int cwdlen;
char *cwd;
struct ordered_samples ordered_samples;
@@ -106,6 +108,7 @@ int perf_session__create_kernel_maps(struct perf_session *self);
int do_read(int fd, void *buf, size_t size);
void perf_session__update_sample_type(struct perf_session *self);
+void perf_session__set_sample_id_all(struct perf_session *session, bool value);
void perf_session__set_sample_type(struct perf_session *session, u64 type);
void perf_session__remove_thread(struct perf_session *self, struct thread *th);
--
1.6.2.5
From: Arnaldo Carvalho de Melo <[email protected]>
At perf_session__process_event, so that we reduce the number of lines in eache
tool sample processing routine that now receives a sample_data pointer already
parsed.
This will also be useful in the next patch, where we'll allow sample the
identity fields in MMAP, FORK, EXIT, etc, when it will be possible to see (cpu,
timestamp) just after before every event.
Also validate callchains in perf_session__process_event, i.e. as early as
possible, and keep a counter of the number of events discarded due to invalid
callchains, warning the user about it if it happens.
There is an assumption that was kept that all events have the same sample_type,
that will be dealt with in the future, when this preexisting limitation will be
removed.
Tested-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Acked-by: Ian Munsie <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Frédéric Weisbecker <[email protected]>
Cc: Ian Munsie <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Stephane Eranian <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-annotate.c | 6 +-
tools/perf/builtin-diff.c | 11 ++--
tools/perf/builtin-inject.c | 39 +++++++++------
tools/perf/builtin-kmem.c | 21 ++------
tools/perf/builtin-lock.c | 12 ++---
tools/perf/builtin-record.c | 7 +++
tools/perf/builtin-report.c | 15 +++---
tools/perf/builtin-sched.c | 21 ++------
tools/perf/builtin-script.c | 34 +++++---------
tools/perf/builtin-timechart.c | 40 ++++++++--------
tools/perf/builtin-top.c | 4 +-
tools/perf/util/build-id.c | 7 ++-
tools/perf/util/event.c | 66 +++++++++++---------------
tools/perf/util/event.h | 20 +++++--
tools/perf/util/header.c | 11 ++--
tools/perf/util/hist.h | 2 +
tools/perf/util/session.c | 103 ++++++++++++++++++++++++++++++----------
tools/perf/util/session.h | 9 ++-
18 files changed, 236 insertions(+), 192 deletions(-)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6d5604d..569a276 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
return hist_entry__inc_addr_samples(he, al->addr);
}
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+ struct perf_session *session)
{
struct addr_location al;
- struct sample_data data;
- if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+ if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 221b823..5e1a043 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self,
return -ENOMEM;
}
-static int diff__process_sample_event(event_t *event, struct perf_session *session)
+static int diff__process_sample_event(event_t *event,
+ struct sample_data *sample,
+ struct perf_session *session)
{
struct addr_location al;
- struct sample_data data = { .period = 1, };
- if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+ if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
if (al.filtered || al.sym == NULL)
return 0;
- if (hists__add_entry(&session->hists, &al, data.period)) {
+ if (hists__add_entry(&session->hists, &al, sample->period)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
- session->hists.stats.total_period += data.period;
+ session->hists.stats.total_period += sample->period;
return 0;
}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8e3e47b..4b66b85 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
static char const *input_name = "-";
static bool inject_build_ids;
-static int event__repipe(event_t *event __used,
- struct perf_session *session __used)
+static int event__repipe_synth(event_t *event,
+ struct perf_session *session __used)
{
uint32_t size;
void *buf = event;
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used,
return 0;
}
-static int event__repipe_mmap(event_t *self, struct perf_session *session)
+static int event__repipe(event_t *event, struct sample_data *sample __used,
+ struct perf_session *session)
+{
+ return event__repipe_synth(event, session);
+}
+
+static int event__repipe_mmap(event_t *self, struct sample_data *sample,
+ struct perf_session *session)
{
int err;
- err = event__process_mmap(self, session);
- event__repipe(self, session);
+ err = event__process_mmap(self, sample, session);
+ event__repipe(self, sample, session);
return err;
}
-static int event__repipe_task(event_t *self, struct perf_session *session)
+static int event__repipe_task(event_t *self, struct sample_data *sample,
+ struct perf_session *session)
{
int err;
- err = event__process_task(self, session);
- event__repipe(self, session);
+ err = event__process_task(self, sample, session);
+ event__repipe(self, sample, session);
return err;
}
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self,
{
int err;
- event__repipe(self, session);
+ event__repipe_synth(self, session);
err = event__process_tracing_data(self, session);
return err;
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
return 0;
}
-static int event__inject_buildid(event_t *event, struct perf_session *session)
+static int event__inject_buildid(event_t *event, struct sample_data *sample,
+ struct perf_session *session)
{
struct addr_location al;
struct thread *thread;
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session)
}
repipe:
- event__repipe(event, session);
+ event__repipe(event, sample, session);
return 0;
}
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = {
.read = event__repipe,
.throttle = event__repipe,
.unthrottle = event__repipe,
- .attr = event__repipe,
- .event_type = event__repipe,
- .tracing_data = event__repipe,
- .build_id = event__repipe,
+ .attr = event__repipe_synth,
+ .event_type = event__repipe_synth,
+ .tracing_data = event__repipe_synth,
+ .build_id = event__repipe_synth,
};
extern volatile int session_done;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 31f60a2..d0a652e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data,
}
}
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+ struct perf_session *session)
{
- struct sample_data data;
- struct thread *thread;
+ struct thread *thread = perf_session__findnew(session, event->ip.pid);
- memset(&data, 0, sizeof(data));
- data.time = -1;
- data.cpu = -1;
- data.period = 1;
-
- event__parse_sample(event, session->sample_type, &data);
-
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
- data.pid, data.tid, data.ip, data.period);
-
- thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
event->header.type);
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
- process_raw_event(event, data.raw_data, data.cpu,
- data.time, thread);
+ process_raw_event(event, sample->raw_data, sample->cpu,
+ sample->time, thread);
return 0;
}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 8452a2a..92d3da5 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,22 +834,18 @@ static void dump_info(void)
die("Unknown type of information\n");
}
-static int process_sample_event(event_t *self, struct perf_session *s)
+static int process_sample_event(event_t *self, struct sample_data *sample,
+ struct perf_session *s)
{
- struct sample_data data;
- struct thread *thread;
+ struct thread *thread = perf_session__findnew(s, sample->tid);
- bzero(&data, sizeof(data));
- event__parse_sample(self, s->sample_type, &data);
-
- thread = perf_session__findnew(s, data.tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
self->header.type);
return -1;
}
- process_raw_event(data.raw_data, data.cpu, data.time, thread);
+ process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
return 0;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 024e144..b34de92 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -36,6 +36,7 @@ static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
static u64 user_interval = ULLONG_MAX;
static u64 default_interval = 0;
+static u64 sample_type;
static int nr_cpus = 0;
static unsigned int page_size;
@@ -129,6 +130,7 @@ static void write_output(void *buf, size_t size)
}
static int process_synthesized_event(event_t *event,
+ struct sample_data *sample __used,
struct perf_session *self __used)
{
write_output(event, event->header.size);
@@ -287,6 +289,9 @@ static void create_counter(int counter, int cpu)
attr->sample_type |= PERF_SAMPLE_CPU;
}
+ if (!sample_type)
+ sample_type = attr->sample_type;
+
attr->mmap = track;
attr->comm = track;
attr->inherit = !no_inherit;
@@ -642,6 +647,8 @@ static int __cmd_record(int argc, const char **argv)
open_counters(cpumap[i]);
}
+ perf_session__set_sample_type(session, sample_type);
+
if (pipe_output) {
err = perf_header__write_pipe(output);
if (err < 0)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5de405d..904519f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session,
return 0;
}
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+ struct perf_session *session)
{
- struct sample_data data = { .period = 1, };
struct addr_location al;
struct perf_event_attr *attr;
- if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+ if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
if (al.filtered || (hide_unresolved && al.sym == NULL))
return 0;
- if (perf_session__add_hist_entry(session, &al, &data)) {
+ if (perf_session__add_hist_entry(session, &al, sample)) {
pr_debug("problem incrementing symbol period, skipping event\n");
return -1;
}
- attr = perf_header__find_attr(data.id, &session->header);
+ attr = perf_header__find_attr(sample->id, &session->header);
- if (add_event_total(session, &data, attr)) {
+ if (add_event_total(session, sample, attr)) {
pr_debug("problem adding event period\n");
return -1;
}
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
return 0;
}
-static int process_read_event(event_t *event, struct perf_session *session __used)
+static int process_read_event(event_t *event, struct sample_data *sample __used,
+ struct perf_session *session __used)
{
struct perf_event_attr *attr;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 26523c9..73d1e30 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
}
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+ struct perf_session *session)
{
- struct sample_data data;
struct thread *thread;
if (!(session->sample_type & PERF_SAMPLE_RAW))
return 0;
- memset(&data, 0, sizeof(data));
- data.time = -1;
- data.cpu = -1;
- data.period = -1;
-
- event__parse_sample(event, session->sample_type, &data);
-
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
- data.pid, data.tid, data.ip, data.period);
-
- thread = perf_session__findnew(session, data.pid);
+ thread = perf_session__findnew(session, sample->pid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
event->header.type);
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session)
dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
- if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
+ if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
return 0;
- process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread);
+ process_raw_event(event, session, sample->raw_data, sample->cpu,
+ sample->time, thread);
return 0;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4539551..683a305 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,22 +63,11 @@ static int cleanup_scripting(void)
static char const *input_name = "perf.data";
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+ struct perf_session *session)
{
- struct sample_data data;
- struct thread *thread;
+ struct thread *thread = perf_session__findnew(session, event->ip.pid);
- memset(&data, 0, sizeof(data));
- data.time = -1;
- data.cpu = -1;
- data.period = 1;
-
- event__parse_sample(event, session->sample_type, &data);
-
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
- data.pid, data.tid, data.ip, data.period);
-
- thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
event->header.type);
@@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
if (session->sample_type & PERF_SAMPLE_RAW) {
if (debug_mode) {
- if (data.time < last_timestamp) {
+ if (sample->time < last_timestamp) {
pr_err("Samples misordered, previous: %llu "
"this: %llu\n", last_timestamp,
- data.time);
+ sample->time);
nr_unordered++;
}
- last_timestamp = data.time;
+ last_timestamp = sample->time;
return 0;
}
/*
@@ -101,18 +90,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
* field, although it should be the same than this perf
* event pid
*/
- scripting_ops->process_event(data.cpu, data.raw_data,
- data.raw_size,
- data.time, thread->comm);
+ scripting_ops->process_event(sample->cpu, sample->raw_data,
+ sample->raw_size,
+ sample->time, thread->comm);
}
- session->hists.stats.total_period += data.period;
+ session->hists.stats.total_period += sample->period;
return 0;
}
static u64 nr_lost;
-static int process_lost_event(event_t *event, struct perf_session *session __used)
+static int process_lost_event(event_t *event, struct sample_data *sample __used,
+ struct perf_session *session __used)
{
nr_lost += event->lost.lost;
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9bcc38f..1f158dc 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -272,19 +272,22 @@ static int cpus_cstate_state[MAX_CPUS];
static u64 cpus_pstate_start_times[MAX_CPUS];
static u64 cpus_pstate_state[MAX_CPUS];
-static int process_comm_event(event_t *event, struct perf_session *session __used)
+static int process_comm_event(event_t *event, struct sample_data *sample __used,
+ struct perf_session *session __used)
{
pid_set_comm(event->comm.tid, event->comm.comm);
return 0;
}
-static int process_fork_event(event_t *event, struct perf_session *session __used)
+static int process_fork_event(event_t *event, struct sample_data *sample __used,
+ struct perf_session *session __used)
{
pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
return 0;
}
-static int process_exit_event(event_t *event, struct perf_session *session __used)
+static int process_exit_event(event_t *event, struct sample_data *sample __used,
+ struct perf_session *session __used)
{
pid_exit(event->fork.pid, event->fork.time);
return 0;
@@ -470,24 +473,21 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
}
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event __used,
+ struct sample_data *sample,
+ struct perf_session *session)
{
- struct sample_data data;
struct trace_entry *te;
- memset(&data, 0, sizeof(data));
-
- event__parse_sample(event, session->sample_type, &data);
-
if (session->sample_type & PERF_SAMPLE_TIME) {
- if (!first_time || first_time > data.time)
- first_time = data.time;
- if (last_time < data.time)
- last_time = data.time;
+ if (!first_time || first_time > sample->time)
+ first_time = sample->time;
+ if (last_time < sample->time)
+ last_time = sample->time;
}
- te = (void *)data.raw_data;
- if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
+ te = (void *)sample->raw_data;
+ if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
char *event_str;
struct power_entry *pe;
@@ -499,19 +499,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
return 0;
if (strcmp(event_str, "power:power_start") == 0)
- c_state_start(pe->cpu_id, data.time, pe->value);
+ c_state_start(pe->cpu_id, sample->time, pe->value);
if (strcmp(event_str, "power:power_end") == 0)
- c_state_end(pe->cpu_id, data.time);
+ c_state_end(pe->cpu_id, sample->time);
if (strcmp(event_str, "power:power_frequency") == 0)
- p_state_change(pe->cpu_id, data.time, pe->value);
+ p_state_change(pe->cpu_id, sample->time, pe->value);
if (strcmp(event_str, "sched:sched_wakeup") == 0)
- sched_wakeup(data.cpu, data.time, data.pid, te);
+ sched_wakeup(sample->cpu, sample->time, sample->pid, te);
if (strcmp(event_str, "sched:sched_switch") == 0)
- sched_switch(data.cpu, data.time, te);
+ sched_switch(sample->cpu, sample->time, te);
}
return 0;
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 3d2b47d..5aa29e1 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1025,6 +1025,8 @@ static void event__process_sample(const event_t *self,
if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
exact_samples++;
+ event__parse_sample(self, session->sample_type, &data);
+
if (event__preprocess_sample(self, session, &al, &data,
symbol_filter) < 0 ||
al.filtered)
@@ -1155,7 +1157,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
if (event->header.type == PERF_RECORD_SAMPLE)
event__process_sample(event, self, md->counter);
else
- event__process(event, self);
+ event__process(event, NULL, self);
old += size;
}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e437edb..deffb8c 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,7 +14,9 @@
#include <linux/kernel.h>
#include "debug.h"
-static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
+static int build_id__mark_dso_hit(event_t *event,
+ struct sample_data *sample __used,
+ struct perf_session *session)
{
struct addr_location al;
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
return 0;
}
-static int event__exit_del_thread(event_t *self, struct perf_session *session)
+static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
+ struct perf_session *session)
{
struct thread *thread = perf_session__findnew(session, self->fork.tid);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 7260db7..34510f4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -24,6 +24,15 @@ const char *event__name[] = {
[PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
};
+static struct sample_data synth_sample = {
+ .pid = -1,
+ .tid = -1,
+ .time = -1,
+ .stream_id = -1,
+ .cpu = -1,
+ .period = 1,
+};
+
static pid_t event__synthesize_comm(pid_t pid, int full,
event__handler_t process,
struct perf_session *session)
@@ -75,7 +84,7 @@ out_race:
if (!full) {
ev.comm.tid = pid;
- process(&ev, session);
+ process(&ev, &synth_sample, session);
goto out_fclose;
}
@@ -93,7 +102,7 @@ out_race:
ev.comm.tid = pid;
- process(&ev, session);
+ process(&ev, &synth_sample, session);
}
closedir(tasks);
@@ -173,7 +182,7 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
ev.mmap.pid = tgid;
ev.mmap.tid = pid;
- process(&ev, session);
+ process(&ev, &synth_sample, session);
}
}
@@ -219,7 +228,7 @@ int event__synthesize_modules(event__handler_t process,
memcpy(ev.mmap.filename, pos->dso->long_name,
pos->dso->long_name_len + 1);
- process(&ev, session);
+ process(&ev, &synth_sample, session);
}
return 0;
@@ -331,7 +340,7 @@ int event__synthesize_kernel_mmap(event__handler_t process,
ev.mmap.len = map->end - ev.mmap.start;
ev.mmap.pid = machine->pid;
- return process(&ev, session);
+ return process(&ev, &synth_sample, session);
}
static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +370,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
return 0;
}
-int event__process_comm(event_t *self, struct perf_session *session)
+int event__process_comm(event_t *self, struct sample_data *sample __used,
+ struct perf_session *session)
{
struct thread *thread = perf_session__findnew(session, self->comm.tid);
@@ -376,7 +386,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
return 0;
}
-int event__process_lost(event_t *self, struct perf_session *session)
+int event__process_lost(event_t *self, struct sample_data *sample __used,
+ struct perf_session *session)
{
dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
session->hists.stats.total_lost += self->lost.lost;
@@ -485,7 +496,8 @@ out_problem:
return -1;
}
-int event__process_mmap(event_t *self, struct perf_session *session)
+int event__process_mmap(event_t *self, struct sample_data *sample __used,
+ struct perf_session *session)
{
struct machine *machine;
struct thread *thread;
@@ -526,7 +538,8 @@ out_problem:
return 0;
}
-int event__process_task(event_t *self, struct perf_session *session)
+int event__process_task(event_t *self, struct sample_data *sample __used,
+ struct perf_session *session)
{
struct thread *thread = perf_session__findnew(session, self->fork.tid);
struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +561,19 @@ int event__process_task(event_t *self, struct perf_session *session)
return 0;
}
-int event__process(event_t *event, struct perf_session *session)
+int event__process(event_t *event, struct sample_data *sample,
+ struct perf_session *session)
{
switch (event->header.type) {
case PERF_RECORD_COMM:
- event__process_comm(event, session);
+ event__process_comm(event, sample, session);
break;
case PERF_RECORD_MMAP:
- event__process_mmap(event, session);
+ event__process_mmap(event, sample, session);
break;
case PERF_RECORD_FORK:
case PERF_RECORD_EXIT:
- event__process_task(event, session);
+ event__process_task(event, sample, session);
break;
default:
break;
@@ -674,32 +688,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
symbol_filter_t filter)
{
u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- struct thread *thread;
-
- event__parse_sample(self, session->sample_type, data);
+ struct thread *thread = perf_session__findnew(session, self->ip.pid);
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
- self->header.misc, data->pid, data->tid, data->ip,
- data->period, data->cpu);
-
- if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
- unsigned int i;
-
- dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
-
- if (!ip_callchain__valid(data->callchain, self)) {
- pr_debug("call-chain problem with event, "
- "skipping it.\n");
- goto out_filtered;
- }
-
- if (dump_trace) {
- for (i = 0; i < data->callchain->nr; i++)
- dump_printf("..... %2d: %016Lx\n",
- i, data->callchain->ips[i]);
- }
- }
- thread = perf_session__findnew(session, self->ip.pid);
if (thread == NULL)
return -1;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8e790da..08c400b 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -135,7 +135,10 @@ void event__print_totals(void);
struct perf_session;
-typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
+typedef int (*event__handler_synth_t)(event_t *event,
+ struct perf_session *session);
+typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
+ struct perf_session *session);
int event__synthesize_thread(pid_t pid, event__handler_t process,
struct perf_session *session);
@@ -150,11 +153,16 @@ int event__synthesize_modules(event__handler_t process,
struct perf_session *session,
struct machine *machine);
-int event__process_comm(event_t *self, struct perf_session *session);
-int event__process_lost(event_t *self, struct perf_session *session);
-int event__process_mmap(event_t *self, struct perf_session *session);
-int event__process_task(event_t *self, struct perf_session *session);
-int event__process(event_t *event, struct perf_session *session);
+int event__process_comm(event_t *self, struct sample_data *sample,
+ struct perf_session *session);
+int event__process_lost(event_t *self, struct sample_data *sample,
+ struct perf_session *session);
+int event__process_mmap(event_t *self, struct sample_data *sample,
+ struct perf_session *session);
+int event__process_task(event_t *self, struct sample_data *sample,
+ struct perf_session *session);
+int event__process(event_t *event, struct sample_data *sample,
+ struct perf_session *session);
struct addr_location;
int event__preprocess_sample(const event_t *self, struct perf_session *session,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f65d7dc..fe652f3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -993,15 +993,14 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
ev->attr.header.size = size;
- err = process(ev, session);
+ err = process(ev, NULL, session);
free(ev);
return err;
}
-int event__synthesize_attrs(struct perf_header *self,
- event__handler_t process,
+int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
struct perf_session *session)
{
struct perf_header_attr *attr;
@@ -1071,7 +1070,7 @@ int event__synthesize_event_type(u64 event_id, char *name,
ev.event_type.header.size = sizeof(ev.event_type) -
(sizeof(ev.event_type.event_type.name) - size);
- err = process(&ev, session);
+ err = process(&ev, NULL, session);
return err;
}
@@ -1126,7 +1125,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
ev.tracing_data.header.size = sizeof(ev.tracing_data);
ev.tracing_data.size = aligned_size;
- process(&ev, session);
+ process(&ev, NULL, session);
err = read_tracing_data(fd, pattrs, nb_events);
write_padded(fd, NULL, 0, padding);
@@ -1186,7 +1185,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
ev.build_id.header.size = sizeof(ev.build_id) + len;
memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
- err = process(&ev, session);
+ err = process(&ev, NULL, session);
return err;
}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 587d375..ee78985 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -52,8 +52,10 @@ struct sym_priv {
struct events_stats {
u64 total_period;
u64 total_lost;
+ u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_unknown_events;
+ u32 nr_invalid_chains;
};
enum hist_column {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 52672da..08ec018 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -70,6 +70,11 @@ void perf_session__update_sample_type(struct perf_session *self)
self->sample_type = perf_header__sample_type(&self->header);
}
+void perf_session__set_sample_type(struct perf_session *session, u64 type)
+{
+ session->sample_type = type;
+}
+
int perf_session__create_kernel_maps(struct perf_session *self)
{
int ret = machine__create_kernel_maps(&self->host_machine);
@@ -240,7 +245,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
return syms;
}
+static int process_event_synth_stub(event_t *event __used,
+ struct perf_session *session __used)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
static int process_event_stub(event_t *event __used,
+ struct sample_data *sample __used,
struct perf_session *session __used)
{
dump_printf(": unhandled!\n");
@@ -280,13 +293,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
if (handler->unthrottle == NULL)
handler->unthrottle = process_event_stub;
if (handler->attr == NULL)
- handler->attr = process_event_stub;
+ handler->attr = process_event_synth_stub;
if (handler->event_type == NULL)
- handler->event_type = process_event_stub;
+ handler->event_type = process_event_synth_stub;
if (handler->tracing_data == NULL)
- handler->tracing_data = process_event_stub;
+ handler->tracing_data = process_event_synth_stub;
if (handler->build_id == NULL)
- handler->build_id = process_event_stub;
+ handler->build_id = process_event_synth_stub;
if (handler->finished_round == NULL) {
if (handler->ordered_samples)
handler->finished_round = process_finished_round;
@@ -419,6 +432,7 @@ static void flush_sample_queue(struct perf_session *s,
struct ordered_samples *os = &s->ordered_samples;
struct list_head *head = &os->samples;
struct sample_queue *tmp, *iter;
+ struct sample_data sample;
u64 limit = os->next_flush;
u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
@@ -429,7 +443,8 @@ static void flush_sample_queue(struct perf_session *s,
if (iter->timestamp > limit)
break;
- ops->sample(iter->event, s);
+ event__parse_sample(iter->event, s->sample_type, &sample);
+ ops->sample(iter->event, &sample, s);
os->last_flush = iter->timestamp;
list_del(&iter->list);
@@ -578,20 +593,29 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
return 0;
}
-static int perf_session__process_sample(event_t *event, struct perf_session *s,
+static int perf_session__process_sample(event_t *event,
+ struct sample_data *sample,
+ struct perf_session *s,
struct perf_event_ops *ops)
{
- struct sample_data data;
-
if (!ops->ordered_samples)
- return ops->sample(event, s);
+ return ops->sample(event, sample, s);
+
+ queue_sample_event(event, sample, s);
+ return 0;
+}
- bzero(&data, sizeof(struct sample_data));
- event__parse_sample(event, s->sample_type, &data);
+static void callchain__dump(struct sample_data *sample)
+{
+ unsigned int i;
- queue_sample_event(event, &data, s);
+ if (!dump_trace)
+ return;
- return 0;
+ printf("... chain: nr:%Lu\n", sample->callchain->nr);
+
+ for (i = 0; i < sample->callchain->nr; i++)
+ printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
}
static int perf_session__process_event(struct perf_session *self,
@@ -599,8 +623,16 @@ static int perf_session__process_event(struct perf_session *self,
struct perf_event_ops *ops,
u64 file_offset)
{
+ struct sample_data sample;
+
trace_event(event);
+ if (self->header.needs_swap && event__swap_ops[event->header.type])
+ event__swap_ops[event->header.type](event);
+
+ if (event->header.type == PERF_RECORD_SAMPLE)
+ event__parse_sample(event, self->sample_type, &sample);
+
if (event->header.type < PERF_RECORD_HEADER_MAX) {
dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
file_offset, event->header.size,
@@ -608,28 +640,41 @@ static int perf_session__process_event(struct perf_session *self,
hists__inc_nr_events(&self->hists, event->header.type);
}
- if (self->header.needs_swap && event__swap_ops[event->header.type])
- event__swap_ops[event->header.type](event);
-
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
- return perf_session__process_sample(event, self, ops);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ sample.pid, sample.tid, sample.ip, sample.period);
+
+ if (self->sample_type & PERF_SAMPLE_CALLCHAIN) {
+ if (!ip_callchain__valid(sample.callchain, event)) {
+ pr_debug("call-chain problem with event, "
+ "skipping it.\n");
+ ++self->hists.stats.nr_invalid_chains;
+ self->hists.stats.total_invalid_chains += sample.period;
+ return 0;
+ }
+
+ callchain__dump(&sample);
+ }
+
+ return perf_session__process_sample(event, &sample, self, ops);
+
case PERF_RECORD_MMAP:
- return ops->mmap(event, self);
+ return ops->mmap(event, &sample, self);
case PERF_RECORD_COMM:
- return ops->comm(event, self);
+ return ops->comm(event, &sample, self);
case PERF_RECORD_FORK:
- return ops->fork(event, self);
+ return ops->fork(event, &sample, self);
case PERF_RECORD_EXIT:
- return ops->exit(event, self);
+ return ops->exit(event, &sample, self);
case PERF_RECORD_LOST:
- return ops->lost(event, self);
+ return ops->lost(event, &sample, self);
case PERF_RECORD_READ:
- return ops->read(event, self);
+ return ops->read(event, &sample, self);
case PERF_RECORD_THROTTLE:
- return ops->throttle(event, self);
+ return ops->throttle(event, &sample, self);
case PERF_RECORD_UNTHROTTLE:
- return ops->unthrottle(event, self);
+ return ops->unthrottle(event, &sample, self);
case PERF_RECORD_HEADER_ATTR:
return ops->attr(event, self);
case PERF_RECORD_HEADER_EVENT_TYPE:
@@ -894,6 +939,14 @@ out_err:
session->hists.stats.nr_unknown_events);
}
+ if (session->hists.stats.nr_invalid_chains != 0) {
+ ui__warning("Found invalid callchains!\n\n"
+ "%u out of %u events were discarded for this reason.\n\n"
+ "Consider reporting to [email protected].\n\n",
+ session->hists.stats.nr_invalid_chains,
+ session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
+ }
+
perf_session_free_sample_buffers(session);
return err;
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 5bf6efa..4578f86 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -54,7 +54,9 @@ struct perf_session {
struct perf_event_ops;
-typedef int (*event_op)(event_t *self, struct perf_session *session);
+typedef int (*event_op)(event_t *self, struct sample_data *sample,
+ struct perf_session *session);
+typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
typedef int (*event_op2)(event_t *self, struct perf_session *session,
struct perf_event_ops *ops);
@@ -67,8 +69,8 @@ struct perf_event_ops {
lost,
read,
throttle,
- unthrottle,
- attr,
+ unthrottle;
+ event_synth_op attr,
event_type,
tracing_data,
build_id;
@@ -104,6 +106,7 @@ int perf_session__create_kernel_maps(struct perf_session *self);
int do_read(int fd, void *buf, size_t size);
void perf_session__update_sample_type(struct perf_session *self);
+void perf_session__set_sample_type(struct perf_session *session, u64 type);
void perf_session__remove_thread(struct perf_session *self, struct thread *th);
static inline
--
1.6.2.5
From: Arnaldo Carvalho de Melo <[email protected]>
If perf_event_attr.sample_id_all is set it will add the PERF_SAMPLE_ identity
info:
TID, TIME, ID, CPU, STREAM_ID
As a trailer, so that older perf tools can process new files, just ignoring the
extra payload.
With this its possible to do further analysis on problems in the event stream,
like detecting reordering of MMAP and FORK events, etc.
V2: Fixup header size in comm, mmap and task processing, as we have to take into
account different sample_types for each matching event, noticed by Thomas Gleixner.
Thomas also noticed a problem in v2 where if we didn't had space in the buffer we
wouldn't restore the header size.
Tested-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Acked-by: Ian Munsie <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Frédéric Weisbecker <[email protected]>
Cc: Ian Munsie <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Thomas Gleixner <[email protected]>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
include/linux/perf_event.h | 12 +++++-
kernel/perf_event.c | 108 +++++++++++++++++++++++++++++++++++++-------
2 files changed, 102 insertions(+), 18 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b9950b1..2814ead 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -215,8 +215,9 @@ struct perf_event_attr {
*/
precise_ip : 2, /* skid constraint */
mmap_data : 1, /* non-exec mmap data */
+ sample_id_all : 1, /* sample_type all events */
- __reserved_1 : 46;
+ __reserved_1 : 45;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
enum perf_event_type {
/*
+ * If perf_event_attr.sample_id_all is set then all event types will
+ * have the sample_type selected fields related to where/when
+ * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
+ * described in PERF_RECORD_SAMPLE below, it will be stashed just after
+ * the perf_event_header and the fields already present for the existing
+ * fields, i.e. at the end of the payload. That way a newer perf.data
+ * file will be supported by older perf tools, with these new optional
+ * fields being ignored.
+ *
* The MMAP events record the PROT_EXEC mappings so that we can
* correlate userspace IPs to code. They have the following structure:
*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a047997..77ad22c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3388,9 +3388,9 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
} while (len);
}
-static void perf_event_header__init_id(struct perf_event_header *header,
- struct perf_sample_data *data,
- struct perf_event *event)
+static void __perf_event_header__init_id(struct perf_event_header *header,
+ struct perf_sample_data *data,
+ struct perf_event *event)
{
u64 sample_type = event->attr.sample_type;
@@ -3418,6 +3418,43 @@ static void perf_event_header__init_id(struct perf_event_header *header,
}
}
+static void perf_event_header__init_id(struct perf_event_header *header,
+ struct perf_sample_data *data,
+ struct perf_event *event)
+{
+ if (event->attr.sample_id_all)
+ __perf_event_header__init_id(header, data, event);
+}
+
+static void __perf_event__output_id_sample(struct perf_output_handle *handle,
+ struct perf_sample_data *data)
+{
+ u64 sample_type = data->type;
+
+ if (sample_type & PERF_SAMPLE_TID)
+ perf_output_put(handle, data->tid_entry);
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ perf_output_put(handle, data->time);
+
+ if (sample_type & PERF_SAMPLE_ID)
+ perf_output_put(handle, data->id);
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ perf_output_put(handle, data->stream_id);
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ perf_output_put(handle, data->cpu_entry);
+}
+
+static void perf_event__output_id_sample(struct perf_event *event,
+ struct perf_output_handle *handle,
+ struct perf_sample_data *sample)
+{
+ if (event->attr.sample_id_all)
+ __perf_event__output_id_sample(handle, sample);
+}
+
int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
int nmi, int sample)
@@ -3425,6 +3462,7 @@ int perf_output_begin(struct perf_output_handle *handle,
struct perf_buffer *buffer;
unsigned long tail, offset, head;
int have_lost;
+ struct perf_sample_data sample_data;
struct {
struct perf_event_header header;
u64 id;
@@ -3451,8 +3489,12 @@ int perf_output_begin(struct perf_output_handle *handle,
goto out;
have_lost = local_read(&buffer->lost);
- if (have_lost)
- size += sizeof(lost_event);
+ if (have_lost) {
+ lost_event.header.size = sizeof(lost_event);
+ perf_event_header__init_id(&lost_event.header, &sample_data,
+ event);
+ size += lost_event.header.size;
+ }
perf_output_get_handle(handle);
@@ -3483,11 +3525,11 @@ int perf_output_begin(struct perf_output_handle *handle,
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
- lost_event.header.size = sizeof(lost_event);
lost_event.id = event->id;
lost_event.lost = local_xchg(&buffer->lost, 0);
perf_output_put(handle, lost_event);
+ perf_event__output_id_sample(event, handle, &sample_data);
}
return 0;
@@ -3700,7 +3742,7 @@ void perf_prepare_sample(struct perf_event_header *header,
header->misc = 0;
header->misc |= perf_misc_flags(regs);
- perf_event_header__init_id(header, data, event);
+ __perf_event_header__init_id(header, data, event);
if (sample_type & PERF_SAMPLE_IP)
data->ip = perf_instruction_pointer(regs);
@@ -3768,6 +3810,7 @@ perf_event_read_event(struct perf_event *event,
struct task_struct *task)
{
struct perf_output_handle handle;
+ struct perf_sample_data sample;
struct perf_read_event read_event = {
.header = {
.type = PERF_RECORD_READ,
@@ -3779,12 +3822,14 @@ perf_event_read_event(struct perf_event *event,
};
int ret;
+ perf_event_header__init_id(&read_event.header, &sample, event);
ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
if (ret)
return;
perf_output_put(&handle, read_event);
perf_output_read(&handle, event);
+ perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
@@ -3814,14 +3859,16 @@ static void perf_event_task_output(struct perf_event *event,
struct perf_task_event *task_event)
{
struct perf_output_handle handle;
+ struct perf_sample_data sample;
struct task_struct *task = task_event->task;
- int size, ret;
+ int ret, size = task_event->event_id.header.size;
- size = task_event->event_id.header.size;
- ret = perf_output_begin(&handle, event, size, 0, 0);
+ perf_event_header__init_id(&task_event->event_id.header, &sample, event);
+ ret = perf_output_begin(&handle, event,
+ task_event->event_id.header.size, 0, 0);
if (ret)
- return;
+ goto out;
task_event->event_id.pid = perf_event_pid(event, task);
task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3831,7 +3878,11 @@ static void perf_event_task_output(struct perf_event *event,
perf_output_put(&handle, task_event->event_id);
+ perf_event__output_id_sample(event, &handle, &sample);
+
perf_output_end(&handle);
+out:
+ task_event->event_id.header.size = size;
}
static int perf_event_task_match(struct perf_event *event)
@@ -3944,11 +3995,16 @@ static void perf_event_comm_output(struct perf_event *event,
struct perf_comm_event *comm_event)
{
struct perf_output_handle handle;
+ struct perf_sample_data sample;
int size = comm_event->event_id.header.size;
- int ret = perf_output_begin(&handle, event, size, 0, 0);
+ int ret;
+
+ perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
+ ret = perf_output_begin(&handle, event,
+ comm_event->event_id.header.size, 0, 0);
if (ret)
- return;
+ goto out;
comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3956,7 +4012,12 @@ static void perf_event_comm_output(struct perf_event *event,
perf_output_put(&handle, comm_event->event_id);
perf_output_copy(&handle, comm_event->comm,
comm_event->comm_size);
+
+ perf_event__output_id_sample(event, &handle, &sample);
+
perf_output_end(&handle);
+out:
+ comm_event->event_id.header.size = size;
}
static int perf_event_comm_match(struct perf_event *event)
@@ -4001,7 +4062,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
comm_event->comm_size = size;
comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
-
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4080,11 +4140,15 @@ static void perf_event_mmap_output(struct perf_event *event,
struct perf_mmap_event *mmap_event)
{
struct perf_output_handle handle;
+ struct perf_sample_data sample;
int size = mmap_event->event_id.header.size;
- int ret = perf_output_begin(&handle, event, size, 0, 0);
+ int ret;
+ perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
+ ret = perf_output_begin(&handle, event,
+ mmap_event->event_id.header.size, 0, 0);
if (ret)
- return;
+ goto out;
mmap_event->event_id.pid = perf_event_pid(event, current);
mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4092,7 +4156,12 @@ static void perf_event_mmap_output(struct perf_event *event,
perf_output_put(&handle, mmap_event->event_id);
perf_output_copy(&handle, mmap_event->file_name,
mmap_event->file_size);
+
+ perf_event__output_id_sample(event, &handle, &sample);
+
perf_output_end(&handle);
+out:
+ mmap_event->event_id.header.size = size;
}
static int perf_event_mmap_match(struct perf_event *event,
@@ -4245,6 +4314,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
static void perf_log_throttle(struct perf_event *event, int enable)
{
struct perf_output_handle handle;
+ struct perf_sample_data sample;
int ret;
struct {
@@ -4266,11 +4336,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
if (enable)
throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
- ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+ perf_event_header__init_id(&throttle_event.header, &sample, event);
+
+ ret = perf_output_begin(&handle, event,
+ throttle_event.header.size, 1, 0);
if (ret)
return;
perf_output_put(&handle, throttle_event);
+ perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
The precalculated header size is not updated when an event is inherited. That
results in bogus sample entries for all child events. Bug introduced in c320c7b.
Cc: Frederic Weisbecker <[email protected]>
Cc: Ian Munsie <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
kernel/perf_event.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index aede712..7961b27 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -6099,6 +6099,11 @@ inherit_event(struct perf_event *parent_event,
child_event->overflow_handler = parent_event->overflow_handler;
/*
+ * Precalculate sample_data sizes
+ */
+ perf_event__header_size(child_event);
+
+ /*
* Link it up in the child's context:
*/
raw_spin_lock_irqsave(&child_ctx->lock, flags);
--
1.6.2.5
Now that we have timestamps on FORK, EXIT, COMM, MMAP events we can
sort everything in time order. This fixes the following observed
problem:
mmap(file1) -> pagefault() -> munmap(file1)
mmap(file2) -> pagefault() -> munmap(file2)
Resulted in decoding both pagefaults in file2 because the file1 map
was already replaced by the file2 map when the map address was
identical.
With all events sorted we decode both pagefaults correctly.
Signed-off-by: Thomas Gleixner <[email protected]>
---
tools/perf/util/session.c | 125 ++++++++++++++++++++++++++--------------------
1 file changed, 72 insertions(+), 53 deletions(-)
Index: linux-2.6-tip/tools/perf/util/session.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/util/session.c
+++ linux-2.6-tip/tools/perf/util/session.c
@@ -475,6 +475,11 @@ static void perf_session_free_sample_buf
}
}
+static int perf_session_deliver_event(struct perf_session *session,
+ event_t *event,
+ struct sample_data *sample,
+ struct perf_event_ops *ops);
+
static void flush_sample_queue(struct perf_session *s,
struct perf_event_ops *ops)
{
@@ -493,7 +498,7 @@ static void flush_sample_queue(struct pe
break;
event__parse_sample(iter->event, s, &sample);
- ops->sample(iter->event, &sample, s);
+ perf_session_deliver_event(s, iter->event, &sample, ops);
os->last_flush = iter->timestamp;
list_del(&iter->list);
@@ -558,8 +563,7 @@ static int process_finished_round(event_
}
/* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
- struct perf_session *s)
+static void __queue_event(struct sample_queue *new, struct perf_session *s)
{
struct ordered_samples *os = &s->ordered_samples;
struct sample_queue *sample = os->last_sample;
@@ -605,14 +609,17 @@ static void __queue_sample_event(struct
#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
-static int queue_sample_event(event_t *event, struct sample_data *data,
- struct perf_session *s)
+static int perf_session_queue_event(struct perf_session *s, event_t *event,
+ struct sample_data *data)
{
struct ordered_samples *os = &s->ordered_samples;
struct list_head *sc = &os->sample_cache;
u64 timestamp = data->time;
struct sample_queue *new;
+ if (!timestamp)
+ return -ETIME;
+
if (timestamp < s->ordered_samples.last_flush) {
printf("Warning: Timestamp below last timeslice flush\n");
return -EINVAL;
@@ -637,20 +644,8 @@ static int queue_sample_event(event_t *e
new->timestamp = timestamp;
new->event = event;
- __queue_sample_event(new, s);
-
- return 0;
-}
-
-static int perf_session__process_sample(event_t *event,
- struct sample_data *sample,
- struct perf_session *s,
- struct perf_event_ops *ops)
-{
- if (!ops->ordered_samples)
- return ops->sample(event, sample, s);
+ __queue_event(new, s);
- queue_sample_event(event, sample, s);
return 0;
}
@@ -684,83 +679,107 @@ static void perf_session__print_tstamp(s
printf("%Lu ", sample->time);
}
-static int perf_session__process_event(struct perf_session *self,
+static int perf_session_deliver_event(struct perf_session *session,
+ event_t *event,
+ struct sample_data *sample,
+ struct perf_event_ops *ops)
+{
+ switch (event->header.type) {
+ case PERF_RECORD_SAMPLE:
+ return ops->sample(event, sample, session);
+ case PERF_RECORD_MMAP:
+ return ops->mmap(event, sample, session);
+ case PERF_RECORD_COMM:
+ return ops->comm(event, sample, session);
+ case PERF_RECORD_FORK:
+ return ops->fork(event, sample, session);
+ case PERF_RECORD_EXIT:
+ return ops->exit(event, sample, session);
+ case PERF_RECORD_LOST:
+ return ops->lost(event, sample, session);
+ case PERF_RECORD_READ:
+ return ops->read(event, sample, session);
+ case PERF_RECORD_THROTTLE:
+ return ops->throttle(event, sample, session);
+ case PERF_RECORD_UNTHROTTLE:
+ return ops->unthrottle(event, sample, session);
+ default:
+ ++session->hists.stats.nr_unknown_events;
+ return -1;
+ }
+}
+
+static int perf_session__process_event(struct perf_session *session,
event_t *event,
struct perf_event_ops *ops,
u64 file_offset)
{
struct sample_data sample;
+ int ret;
trace_event(event);
- if (self->header.needs_swap && event__swap_ops[event->header.type])
+ if (session->header.needs_swap && event__swap_ops[event->header.type])
event__swap_ops[event->header.type](event);
if (event->header.type >= PERF_RECORD_MMAP &&
event->header.type <= PERF_RECORD_SAMPLE) {
- event__parse_sample(event, self, &sample);
+ event__parse_sample(event, session, &sample);
if (dump_trace)
- perf_session__print_tstamp(self, event, &sample);
+ perf_session__print_tstamp(session, event, &sample);
}
if (event->header.type < PERF_RECORD_HEADER_MAX) {
dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
file_offset, event->header.size,
event__name[event->header.type]);
- hists__inc_nr_events(&self->hists, event->header.type);
+ hists__inc_nr_events(&session->hists, event->header.type);
}
+ /* These events are processed right away */
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n",
+ event->header.misc,
sample.pid, sample.tid, sample.ip, sample.period);
- if (self->sample_type & PERF_SAMPLE_CALLCHAIN) {
+ if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
if (!ip_callchain__valid(sample.callchain, event)) {
pr_debug("call-chain problem with event, "
"skipping it.\n");
- ++self->hists.stats.nr_invalid_chains;
- self->hists.stats.total_invalid_chains += sample.period;
+ ++session->hists.stats.nr_invalid_chains;
+ session->hists.stats.total_invalid_chains +=
+ sample.period;
return 0;
}
callchain__dump(&sample);
}
+ break;
- return perf_session__process_sample(event, &sample, self, ops);
-
- case PERF_RECORD_MMAP:
- return ops->mmap(event, &sample, self);
- case PERF_RECORD_COMM:
- return ops->comm(event, &sample, self);
- case PERF_RECORD_FORK:
- return ops->fork(event, &sample, self);
- case PERF_RECORD_EXIT:
- return ops->exit(event, &sample, self);
- case PERF_RECORD_LOST:
- return ops->lost(event, &sample, self);
- case PERF_RECORD_READ:
- return ops->read(event, &sample, self);
- case PERF_RECORD_THROTTLE:
- return ops->throttle(event, &sample, self);
- case PERF_RECORD_UNTHROTTLE:
- return ops->unthrottle(event, &sample, self);
case PERF_RECORD_HEADER_ATTR:
- return ops->attr(event, self);
+ return ops->attr(event, session);
case PERF_RECORD_HEADER_EVENT_TYPE:
- return ops->event_type(event, self);
+ return ops->event_type(event, session);
case PERF_RECORD_HEADER_TRACING_DATA:
/* setup for reading amidst mmap */
- lseek(self->fd, file_offset, SEEK_SET);
- return ops->tracing_data(event, self);
+ lseek(session->fd, file_offset, SEEK_SET);
+ return ops->tracing_data(event, session);
case PERF_RECORD_HEADER_BUILD_ID:
- return ops->build_id(event, self);
+ return ops->build_id(event, session);
case PERF_RECORD_FINISHED_ROUND:
- return ops->finished_round(event, self, ops);
+ return ops->finished_round(event, session, ops);
default:
- ++self->hists.stats.nr_unknown_events;
- return -1;
+ break;
}
+
+ if (ops->ordered_samples) {
+ ret = perf_session_queue_event(session, event, &sample);
+ if (ret != -ETIME)
+ return ret;
+ }
+
+ return perf_session_deliver_event(session, event, &sample, ops);
}
void perf_event_header__bswap(struct perf_event_header *self)
Hi all,
Here are my set of patches to process events in order and solve the bug where
samples would not be attributed correctly if the COMM and MMAP events for them
had not yet been processed.
I ran out of time cleaning these on Friday and hadn't touched them over the
weekend. I see Thomas has now already posted a patch that does most of what the
third patch in this series does, and this series would supersede his patch, my
apologies for not getting these out last week.
In addition to what Thomas' patch does, patch 3 in this series:
- Enables the PERF_SAMPLE_TIME flag automatically when recording if recording
from multiple CPUs (ie, if no_inherit is not set). This flag will be
disabled if the kernel does not support timestamps on every event and was
not manually requested.
- Enables ordered_samples in perf report by default.
- If an event is encountered without a timestamp, fall back to processing in
file order (otherwise we might process a PERF_RECORD_EXIT event too early).
Thomas' patch does change some names that I did not ('self' -> 'session'), but
those should be pretty straight forward to put in their own patch on top of
these.
The first patch in this series changes how unresolved symbols are printed out
and the second patch moves some debugging output into the trace_event function
to keep the debugging output sensible when processing events by timestamp.
Cheers,
-Ian
From: Ian Munsie <[email protected]>
In the event that a DSO has not been identified, just print out
[unknown] instead of the instruction pointer as we previously were
doing, which is pretty meaningless for a shared object (at least to the
users perspective).
The IP we print out is fairly meaningless in general anyway - it's just
one (the first) of the many addresses that were lumped together as
unidentified, and could span many shared objects and symbols. In reality
if we see this [unknown] output then the report -D output is going to be
more useful anyway as we can see all the different address that it
represents.
If we are printing the symbols we are still going to see this IP in that
column anyway since they shouldn't resolve either.
This patch also changes the symbol address printouts so that they print
out 0x before the address, are left aligned, and changes the %L format
string (which relies on a glibc bug) to %ll.
Before:
74.11% :3259 4a6c [k] 4a6c
After:
74.11% :3259 [unknown] [k] 0x4a6c
Signed-off-by: Ian Munsie <[email protected]>
---
tools/perf/util/sort.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index b62a553..f44fa54 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
return repsep_snprintf(bf, size, "%-*s", width, dso_name);
}
- return repsep_snprintf(bf, size, "%*Lx", width, self->ip);
+ return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
}
/* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
if (verbose) {
char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
- ret += repsep_snprintf(bf, size, "%*Lx %c ",
+ ret += repsep_snprintf(bf, size, "%-#*llx %c ",
BITS_PER_LONG / 4, self->ip, o);
}
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
ret += repsep_snprintf(bf + ret, size - ret, "%s",
self->ms.sym->name);
else
- ret += repsep_snprintf(bf + ret, size - ret, "%*Lx",
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
BITS_PER_LONG / 4, self->ip);
return ret;
--
1.7.2.3
From: Ian Munsie <[email protected]>
This patch moves all the places that would produce output while doing a
perf report -D into the trace_event routine. This will be necessary to
allow perf report to process events ordered by timestamp and still
produce sane debugging output.
The offset of ordered events in the file are stored in the sample queue
to allow them to be printed in the debugging output as before.
Signed-off-by: Ian Munsie <[email protected]>
---
tools/perf/util/debug.c | 53 +++++++++++++++++++++++++-
tools/perf/util/debug.h | 4 +-
tools/perf/util/session.c | 92 +++++++++++++-------------------------------
3 files changed, 82 insertions(+), 67 deletions(-)
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 01bbe8e..d47dff4 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -11,6 +11,7 @@
#include "event.h"
#include "debug.h"
#include "util.h"
+#include "session.h"
int verbose;
bool dump_trace = false, quiet = false;
@@ -57,7 +58,36 @@ void ui__warning(const char *format, ...)
}
#endif
-void trace_event(event_t *event)
+static void callchain__dump(struct sample_data *sample)
+{
+ unsigned int i;
+
+ printf("... chain: nr:%llu\n", sample->callchain->nr);
+
+ for (i = 0; i < sample->callchain->nr; i++)
+ printf("..... %2d: %016llx\n", i, sample->callchain->ips[i]);
+}
+
+static void perf_session__print_tstamp(struct perf_session *session,
+ event_t *event,
+ struct sample_data *sample)
+{
+ if (event->header.type != PERF_RECORD_SAMPLE &&
+ !session->sample_id_all) {
+ fputs("-1 -1 ", stdout);
+ return;
+ }
+
+ if ((session->sample_type & PERF_SAMPLE_CPU))
+ printf("%u ", sample->cpu);
+
+ if (session->sample_type & PERF_SAMPLE_TIME)
+ printf("%llu ", sample->time);
+}
+
+void trace_event(event_t *event, u64 file_offset,
+ struct perf_session *session,
+ struct sample_data *sample)
{
unsigned char *raw_event = (void *)event;
const char *color = PERF_COLOR_BLUE;
@@ -66,6 +96,9 @@ void trace_event(event_t *event)
if (!dump_trace)
return;
+ printf("\n%#llx [%#x]: event: %d\n",
+ file_offset, event->header.size, event->header.type);
+
printf(".");
color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
event->header.size);
@@ -91,4 +124,22 @@ void trace_event(event_t *event)
}
}
printf(".\n");
+
+ if (event->header.type >= PERF_RECORD_MMAP &&
+ event->header.type <= PERF_RECORD_SAMPLE)
+ perf_session__print_tstamp(session, event, sample);
+
+ if (event->header.type < PERF_RECORD_HEADER_MAX) {
+ printf("%#llx [%#x]: PERF_RECORD_%s",
+ file_offset, event->header.size,
+ event__name[event->header.type]);
+ }
+
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ printf("(IP, %d): %d/%d: %#llx period: %lld\n", event->header.misc,
+ sample->pid, sample->tid, sample->ip, sample->period);
+
+ if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
+ callchain__dump(sample);
+ }
}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index ca35fd6..80f0285 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -9,7 +9,9 @@ extern int verbose;
extern bool quiet, dump_trace;
int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
-void trace_event(event_t *event);
+void trace_event(event_t *event, u64 file_offset,
+ struct perf_session *session,
+ struct sample_data *sample);
struct ui_progress;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5c75660..596829f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -444,6 +444,7 @@ static event__swap_op event__swap_ops[] = {
struct sample_queue {
u64 timestamp;
+ u64 file_offset;
event_t *event;
struct list_head list;
};
@@ -479,6 +480,9 @@ static void flush_sample_queue(struct perf_session *s,
break;
event__parse_sample(iter->event, s, &sample);
+
+ trace_event((event_t *)iter->event, iter->file_offset, s, &sample);
+
ops->sample(iter->event, &sample, s);
os->last_flush = iter->timestamp;
@@ -592,7 +596,7 @@ static void __queue_sample_event(struct sample_queue *new,
#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
static int queue_sample_event(event_t *event, struct sample_data *data,
- struct perf_session *s)
+ struct perf_session *s, u64 file_offset)
{
struct ordered_samples *os = &s->ordered_samples;
struct list_head *sc = &os->sample_cache;
@@ -621,6 +625,7 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
}
new->timestamp = timestamp;
+ new->file_offset = file_offset;
new->event = event;
__queue_sample_event(new, s);
@@ -631,43 +636,16 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
static int perf_session__process_sample(event_t *event,
struct sample_data *sample,
struct perf_session *s,
- struct perf_event_ops *ops)
+ struct perf_event_ops *ops,
+ u64 file_offset)
{
- if (!ops->ordered_samples)
+ if (!ops->ordered_samples) {
+ trace_event(event, file_offset, s, sample);
return ops->sample(event, sample, s);
-
- queue_sample_event(event, sample, s);
- return 0;
-}
-
-static void callchain__dump(struct sample_data *sample)
-{
- unsigned int i;
-
- if (!dump_trace)
- return;
-
- printf("... chain: nr:%Lu\n", sample->callchain->nr);
-
- for (i = 0; i < sample->callchain->nr; i++)
- printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
-}
-
-static void perf_session__print_tstamp(struct perf_session *session,
- event_t *event,
- struct sample_data *sample)
-{
- if (event->header.type != PERF_RECORD_SAMPLE &&
- !session->sample_id_all) {
- fputs("-1 -1 ", stdout);
- return;
}
- if ((session->sample_type & PERF_SAMPLE_CPU))
- printf("%u ", sample->cpu);
-
- if (session->sample_type & PERF_SAMPLE_TIME)
- printf("%Lu ", sample->time);
+ queue_sample_event(event, sample, s, file_offset);
+ return 0;
}
static int perf_session__process_event(struct perf_session *self,
@@ -677,44 +655,34 @@ static int perf_session__process_event(struct perf_session *self,
{
struct sample_data sample;
- trace_event(event);
-
if (self->header.needs_swap && event__swap_ops[event->header.type])
event__swap_ops[event->header.type](event);
if (event->header.type >= PERF_RECORD_MMAP &&
- event->header.type <= PERF_RECORD_SAMPLE) {
+ event->header.type <= PERF_RECORD_SAMPLE)
event__parse_sample(event, self, &sample);
- if (dump_trace)
- perf_session__print_tstamp(self, event, &sample);
- }
- if (event->header.type < PERF_RECORD_HEADER_MAX) {
- dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
- file_offset, event->header.size,
- event__name[event->header.type]);
+ if (event->header.type < PERF_RECORD_HEADER_MAX)
hists__inc_nr_events(&self->hists, event->header.type);
- }
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
- sample.pid, sample.tid, sample.ip, sample.period);
-
- if (self->sample_type & PERF_SAMPLE_CALLCHAIN) {
- if (!ip_callchain__valid(sample.callchain, event)) {
- pr_debug("call-chain problem with event, "
- "skipping it.\n");
- ++self->hists.stats.nr_invalid_chains;
- self->hists.stats.total_invalid_chains += sample.period;
- return 0;
- }
-
- callchain__dump(&sample);
+ if (self->sample_type & PERF_SAMPLE_CALLCHAIN &&
+ !ip_callchain__valid(sample.callchain, event)) {
+ pr_debug("call-chain problem with event, "
+ "skipping it.\n");
+ ++self->hists.stats.nr_invalid_chains;
+ self->hists.stats.total_invalid_chains += sample.period;
+ return 0;
}
- return perf_session__process_sample(event, &sample, self, ops);
+ return perf_session__process_sample(event, &sample, self, ops, file_offset);
+ default: break;
+ }
+
+ trace_event(event, file_offset, self, &sample);
+ switch (event->header.type) {
case PERF_RECORD_MMAP:
return ops->mmap(event, &sample, self);
case PERF_RECORD_COMM:
@@ -851,9 +819,6 @@ more:
head += size;
- dump_printf("\n%#Lx [%#x]: event: %d\n",
- head, event.header.size, event.header.type);
-
if (skip > 0)
head += skip;
@@ -942,9 +907,6 @@ more:
size = event->header.size;
- dump_printf("\n%#Lx [%#x]: event: %d\n",
- file_pos, event->header.size, event->header.type);
-
if (size == 0 ||
perf_session__process_event(session, event, ops, file_pos) < 0) {
dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
--
1.7.2.3
From: Ian Munsie <[email protected]>
This patch changes perf report to ask for the ID info on all events be
default if recording from multiple CPUs.
Perf report will now process the events in order if the kernel is able
to provide timestamps on all events. This ensures that events such as
COMM and MMAP which are necessary to correctly interpret samples are
processed prior to those samples so that they are attributed correctly.
Before:
# perf record ./cachetest
# perf report
# Events: 6K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. ...............................
#
74.11% :3259 [unknown] [k] 0x4a6c
1.50% cachetest ld-2.11.2.so [.] 0x1777c
1.46% :3259 [kernel.kallsyms] [k] .perf_event_mmap_ctx
1.25% :3259 [kernel.kallsyms] [k] restore
0.74% :3259 [kernel.kallsyms] [k] ._raw_spin_lock
0.71% :3259 [kernel.kallsyms] [k] .filemap_fault
0.66% :3259 [kernel.kallsyms] [k] .memset
0.54% cachetest [kernel.kallsyms] [k] .sha_transform
0.54% :3259 [kernel.kallsyms] [k] .copy_4K_page
0.54% :3259 [kernel.kallsyms] [k] .find_get_page
0.52% :3259 [kernel.kallsyms] [k] .trace_hardirqs_off
0.50% :3259 [kernel.kallsyms] [k] .__do_fault
<SNIP>
After:
# perf report
# Events: 6K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. ...............................
#
44.28% cachetest cachetest [.] sumArrayNaive
22.53% cachetest cachetest [.] sumArrayOptimal
6.59% cachetest ld-2.11.2.so [.] 0x1777c
2.13% cachetest [unknown] [k] 0x340
1.46% cachetest [kernel.kallsyms] [k] .perf_event_mmap_ctx
1.25% cachetest [kernel.kallsyms] [k] restore
0.74% cachetest [kernel.kallsyms] [k] ._raw_spin_lock
0.71% cachetest [kernel.kallsyms] [k] .filemap_fault
0.66% cachetest [kernel.kallsyms] [k] .memset
0.54% cachetest [kernel.kallsyms] [k] .copy_4K_page
0.54% cachetest [kernel.kallsyms] [k] .find_get_page
0.54% cachetest [kernel.kallsyms] [k] .sha_transform
0.52% cachetest [kernel.kallsyms] [k] .trace_hardirqs_off
0.50% cachetest [kernel.kallsyms] [k] .__do_fault
<SNIP>
Signed-off-by: Ian Munsie <[email protected]>
---
tools/perf/builtin-record.c | 4 +-
tools/perf/builtin-report.c | 1 +
tools/perf/util/session.c | 132 ++++++++++++++++++++++++-------------------
3 files changed, 77 insertions(+), 60 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 699dd21..f208c2d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -285,7 +285,7 @@ static void create_counter(int counter, int cpu)
if (system_wide)
attr->sample_type |= PERF_SAMPLE_CPU;
- if (sample_time)
+ if (sample_time || system_wide || !no_inherit || cpu_list)
attr->sample_type |= PERF_SAMPLE_TIME;
if (raw_samples) {
@@ -327,6 +327,8 @@ try_again:
* Old kernel, no attr->sample_id_type_all field
*/
sample_id_all_avail = false;
+ if (!sample_time && !raw_samples)
+ attr->sample_type &= ~PERF_SAMPLE_TIME;
goto retry_sample_id;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 904519f..84d25fa 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -244,6 +244,7 @@ static struct perf_event_ops event_ops = {
.event_type = event__process_event_type,
.tracing_data = event__process_tracing_data,
.build_id = event__process_build_id,
+ .ordered_samples = true,
};
extern volatile int session_done;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 596829f..f33d3b3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -462,6 +462,45 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
}
}
+static int perf_session__dispatch_event(event_t *event,
+ struct sample_data *sample,
+ struct perf_session *s,
+ struct perf_event_ops *ops)
+{
+ switch (event->header.type) {
+ case PERF_RECORD_SAMPLE:
+ return ops->sample(event, sample, s);
+ case PERF_RECORD_MMAP:
+ return ops->mmap(event, sample, s);
+ case PERF_RECORD_COMM:
+ return ops->comm(event, sample, s);
+ case PERF_RECORD_FORK:
+ return ops->fork(event, sample, s);
+ case PERF_RECORD_EXIT:
+ return ops->exit(event, sample, s);
+ case PERF_RECORD_LOST:
+ return ops->lost(event, sample, s);
+ case PERF_RECORD_READ:
+ return ops->read(event, sample, s);
+ case PERF_RECORD_THROTTLE:
+ return ops->throttle(event, sample, s);
+ case PERF_RECORD_UNTHROTTLE:
+ return ops->unthrottle(event, sample, s);
+ case PERF_RECORD_HEADER_ATTR:
+ return ops->attr(event, s);
+ case PERF_RECORD_HEADER_EVENT_TYPE:
+ return ops->event_type(event, s);
+ case PERF_RECORD_HEADER_TRACING_DATA:
+ return ops->tracing_data(event, s);
+ case PERF_RECORD_HEADER_BUILD_ID:
+ return ops->build_id(event, s);
+ case PERF_RECORD_FINISHED_ROUND:
+ return ops->finished_round(event, s, ops);
+ default:
+ return -1;
+ }
+}
+
static void flush_sample_queue(struct perf_session *s,
struct perf_event_ops *ops)
{
@@ -482,8 +521,7 @@ static void flush_sample_queue(struct perf_session *s,
event__parse_sample(iter->event, s, &sample);
trace_event((event_t *)iter->event, iter->file_offset, s, &sample);
-
- ops->sample(iter->event, &sample, s);
+ perf_session__dispatch_event(iter->event, &sample, s, ops);
os->last_flush = iter->timestamp;
list_del(&iter->list);
@@ -548,8 +586,7 @@ static int process_finished_round(event_t *event __used,
}
/* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
- struct perf_session *s)
+static void __queue_event(struct sample_queue *new, struct perf_session *s)
{
struct ordered_samples *os = &s->ordered_samples;
struct sample_queue *sample = os->last_sample;
@@ -595,12 +632,11 @@ static void __queue_sample_event(struct sample_queue *new,
#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
-static int queue_sample_event(event_t *event, struct sample_data *data,
- struct perf_session *s, u64 file_offset)
+static int queue_event(event_t *event, u64 timestamp,
+ struct perf_session *s, u64 file_offset)
{
struct ordered_samples *os = &s->ordered_samples;
struct list_head *sc = &os->sample_cache;
- u64 timestamp = data->time;
struct sample_queue *new;
if (timestamp < s->ordered_samples.last_flush) {
@@ -628,23 +664,28 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
new->file_offset = file_offset;
new->event = event;
- __queue_sample_event(new, s);
+ __queue_event(new, s);
return 0;
}
-static int perf_session__process_sample(event_t *event,
- struct sample_data *sample,
- struct perf_session *s,
- struct perf_event_ops *ops,
- u64 file_offset)
+static int perf_session__process_timed(event_t *event,
+ struct sample_data *sample,
+ struct perf_session *s,
+ struct perf_event_ops *ops,
+ u64 file_offset)
{
+ if (ops->ordered_samples && sample->time == -1ULL) {
+ dump_printf("Event missing timestamp, switching to unordered processing\n");
+ flush_sample_queue(s, ops);
+ ops->ordered_samples = false;
+ }
if (!ops->ordered_samples) {
trace_event(event, file_offset, s, sample);
- return ops->sample(event, sample, s);
+ return perf_session__dispatch_event(event, sample, s, ops);
}
- queue_sample_event(event, sample, s, file_offset);
+ queue_event(event, sample->time, s, file_offset);
return 0;
}
@@ -664,57 +705,30 @@ static int perf_session__process_event(struct perf_session *self,
if (event->header.type < PERF_RECORD_HEADER_MAX)
hists__inc_nr_events(&self->hists, event->header.type);
+ else
+ ++self->hists.stats.nr_unknown_events;
- switch (event->header.type) {
- case PERF_RECORD_SAMPLE:
- if (self->sample_type & PERF_SAMPLE_CALLCHAIN &&
- !ip_callchain__valid(sample.callchain, event)) {
- pr_debug("call-chain problem with event, "
- "skipping it.\n");
- ++self->hists.stats.nr_invalid_chains;
- self->hists.stats.total_invalid_chains += sample.period;
- return 0;
- }
-
- return perf_session__process_sample(event, &sample, self, ops, file_offset);
- default: break;
+ if (event->header.type == PERF_RECORD_SAMPLE &&
+ self->sample_type & PERF_SAMPLE_CALLCHAIN &&
+ !ip_callchain__valid(sample.callchain, event)) {
+ pr_debug("call-chain problem with event, skipping it.\n");
+ ++self->hists.stats.nr_invalid_chains;
+ self->hists.stats.total_invalid_chains += sample.period;
+ return 0;
}
+ if (event->header.type >= PERF_RECORD_MMAP &&
+ event->header.type <= PERF_RECORD_SAMPLE)
+ return perf_session__process_timed(event, &sample, self, ops,
+ file_offset);
+
trace_event(event, file_offset, self, &sample);
- switch (event->header.type) {
- case PERF_RECORD_MMAP:
- return ops->mmap(event, &sample, self);
- case PERF_RECORD_COMM:
- return ops->comm(event, &sample, self);
- case PERF_RECORD_FORK:
- return ops->fork(event, &sample, self);
- case PERF_RECORD_EXIT:
- return ops->exit(event, &sample, self);
- case PERF_RECORD_LOST:
- return ops->lost(event, &sample, self);
- case PERF_RECORD_READ:
- return ops->read(event, &sample, self);
- case PERF_RECORD_THROTTLE:
- return ops->throttle(event, &sample, self);
- case PERF_RECORD_UNTHROTTLE:
- return ops->unthrottle(event, &sample, self);
- case PERF_RECORD_HEADER_ATTR:
- return ops->attr(event, self);
- case PERF_RECORD_HEADER_EVENT_TYPE:
- return ops->event_type(event, self);
- case PERF_RECORD_HEADER_TRACING_DATA:
+ if (event->header.type == PERF_RECORD_HEADER_TRACING_DATA)
/* setup for reading amidst mmap */
lseek(self->fd, file_offset, SEEK_SET);
- return ops->tracing_data(event, self);
- case PERF_RECORD_HEADER_BUILD_ID:
- return ops->build_id(event, self);
- case PERF_RECORD_FINISHED_ROUND:
- return ops->finished_round(event, self, ops);
- default:
- ++self->hists.stats.nr_unknown_events;
- return -1;
- }
+
+ return perf_session__dispatch_event(event, &sample, self, ops);
}
void perf_event_header__bswap(struct perf_event_header *self)
--
1.7.2.3
On Mon, 6 Dec 2010, Ian Munsie wrote:
> From: Ian Munsie <[email protected]>
>
> This patch changes perf report to ask for the ID info on all events be
> default if recording from multiple CPUs.
>
> Perf report will now process the events in order if the kernel is able
> to provide timestamps on all events. This ensures that events such as
> COMM and MMAP which are necessary to correctly interpret samples are
> processed prior to those samples so that they are attributed correctly.
See https://lkml.org/lkml/2010/12/5/45
Slightly different, but the same idea :)
>
> +static int perf_session__dispatch_event(event_t *event,
> + struct sample_data *sample,
> + struct perf_session *s,
> + struct perf_event_ops *ops)
> +{
> + switch (event->header.type) {
> + case PERF_RECORD_SAMPLE:
> + return ops->sample(event, sample, s);
> + case PERF_RECORD_MMAP:
> + return ops->mmap(event, sample, s);
> + case PERF_RECORD_COMM:
> + return ops->comm(event, sample, s);
> + case PERF_RECORD_FORK:
> + return ops->fork(event, sample, s);
> + case PERF_RECORD_EXIT:
> + return ops->exit(event, sample, s);
> + case PERF_RECORD_LOST:
> + return ops->lost(event, sample, s);
> + case PERF_RECORD_READ:
> + return ops->read(event, sample, s);
> + case PERF_RECORD_THROTTLE:
> + return ops->throttle(event, sample, s);
> + case PERF_RECORD_UNTHROTTLE:
> + return ops->unthrottle(event, sample, s);
> + case PERF_RECORD_HEADER_ATTR:
> + return ops->attr(event, s);
> + case PERF_RECORD_HEADER_EVENT_TYPE:
> + return ops->event_type(event, s);
> + case PERF_RECORD_HEADER_TRACING_DATA:
> + return ops->tracing_data(event, s);
> + case PERF_RECORD_HEADER_BUILD_ID:
> + return ops->build_id(event, s);
These can be processed unordered.
> + case PERF_RECORD_FINISHED_ROUND:
> + return ops->finished_round(event, s, ops);
You don't want to delay this unless you have unlimited memory. See my
patch.
> {
> + if (ops->ordered_samples && sample->time == -1ULL) {
> + dump_printf("Event missing timestamp, switching to unordered processing\n");
> + flush_sample_queue(s, ops);
> + ops->ordered_samples = false;
Why ? The events injected by perf record itself have no timestamps and
do not need them. So why disabling ordered_samples ?
Thanks,
tglx
Excerpts from Thomas Gleixner's message of Mon Dec 06 20:20:06 +1100 2010:
> See https://lkml.org/lkml/2010/12/5/45
>
> Slightly different, but the same idea :)
Yeah I noted that in my cover email. I meant to get my patches out
before the weekend, but hadn't quite finished cleaning them up to ensure
they didn't break perf when running on a kernel without sample_id_all.
> > + case PERF_RECORD_HEADER_ATTR:
> > + return ops->attr(event, s);
> > + case PERF_RECORD_HEADER_EVENT_TYPE:
> > + return ops->event_type(event, s);
> > + case PERF_RECORD_HEADER_TRACING_DATA:
> > + return ops->tracing_data(event, s);
> > + case PERF_RECORD_HEADER_BUILD_ID:
> > + return ops->build_id(event, s);
>
> These can be processed unordered.
I just moved them into this routine to keep all the dispatching in one
place, whether delayed or not. These particular events will still be
processed immediately when encountered in the file. Only >=
PERF_RECORD_MMAP && <= PERF_RECORD_SAMPLE will be delayed via the
perf_session__process_timed function.
> > + case PERF_RECORD_FINISHED_ROUND:
> > + return ops->finished_round(event, s, ops);
>
> You don't want to delay this unless you have unlimited memory. See my
> patch.
As above, this is not delayed.
> > {
> > + if (ops->ordered_samples && sample->time == -1ULL) {
> > + dump_printf("Event missing timestamp, switching to unordered processing\n");
> > + flush_sample_queue(s, ops);
> > + ops->ordered_samples = false;
>
> Why ? The events injected by perf record itself have no timestamps and
> do not need them. So why disabling ordered_samples ?
For instance, suppose we ran this on an old kernel without support for
timestamps on every event (so timestamps are only on sample events):
perf record -T
perf report
If perf report tried to process the events in order, all the events
without timestamps would be processed first -- including the
PERF_RECORD_EXIT event, which would cause every sample not to be
attributed. Falling back means we should get no worse than the old
behaviour, while an upgraded kernel will provide the timestamps and
should not fall back.
Cheers,
-Ian
On Mon, 6 Dec 2010, Ian Munsie wrote:
> Excerpts from Thomas Gleixner's message of Mon Dec 06 20:20:06 +1100 2010:
> > See https://lkml.org/lkml/2010/12/5/45
> >
> > Slightly different, but the same idea :)
>
> Yeah I noted that in my cover email. I meant to get my patches out
> before the weekend, but hadn't quite finished cleaning them up to ensure
> they didn't break perf when running on a kernel without sample_id_all.
>
> > > + case PERF_RECORD_HEADER_ATTR:
> > > + return ops->attr(event, s);
> > > + case PERF_RECORD_HEADER_EVENT_TYPE:
> > > + return ops->event_type(event, s);
> > > + case PERF_RECORD_HEADER_TRACING_DATA:
> > > + return ops->tracing_data(event, s);
> > > + case PERF_RECORD_HEADER_BUILD_ID:
> > > + return ops->build_id(event, s);
> >
> > These can be processed unordered.
>
> I just moved them into this routine to keep all the dispatching in one
> place, whether delayed or not. These particular events will still be
> processed immediately when encountered in the file. Only >=
> PERF_RECORD_MMAP && <= PERF_RECORD_SAMPLE will be delayed via the
> perf_session__process_timed function.
Gah. This is nasty. I really prefer the explicit split of instant
processed and possibly delayed events. That makes the code clear and
easy to extend. I just want to add a new event type at the right place
and not worry about magic comparisions in some other place.
> > > {
> > > + if (ops->ordered_samples && sample->time == -1ULL) {
> > > + dump_printf("Event missing timestamp, switching to unordered processing\n");
> > > + flush_sample_queue(s, ops);
> > > + ops->ordered_samples = false;
> >
> > Why ? The events injected by perf record itself have no timestamps and
> > do not need them. So why disabling ordered_samples ?
>
> For instance, suppose we ran this on an old kernel without support for
> timestamps on every event (so timestamps are only on sample events):
>
> perf record -T
> perf report
>
> If perf report tried to process the events in order, all the events
> without timestamps would be processed first -- including the
> PERF_RECORD_EXIT event, which would cause every sample not to be
> attributed. Falling back means we should get no worse than the old
> behaviour, while an upgraded kernel will provide the timestamps and
> should not fall back.
Ok, but you'll break existing code which does only care about sample
ordering if you do that at the session level unconditionally.
Thanks,
tglx
Excerpts from Thomas Gleixner's message of Tue Dec 07 00:04:20 +1100 2010:
> > I just moved them into this routine to keep all the dispatching in one
> > place, whether delayed or not. These particular events will still be
> > processed immediately when encountered in the file. Only >=
> > PERF_RECORD_MMAP && <= PERF_RECORD_SAMPLE will be delayed via the
> > perf_session__process_timed function.
>
> Gah. This is nasty. I really prefer the explicit split of instant
> processed and possibly delayed events. That makes the code clear and
> easy to extend. I just want to add a new event type at the right place
> and not worry about magic comparisions in some other place.
Fair enough, I'll split them back out.
> > For instance, suppose we ran this on an old kernel without support for
> > timestamps on every event (so timestamps are only on sample events):
> >
> > perf record -T
> > perf report
> >
> > If perf report tried to process the events in order, all the events
> > without timestamps would be processed first -- including the
> > PERF_RECORD_EXIT event, which would cause every sample not to be
> > attributed. Falling back means we should get no worse than the old
> > behaviour, while an upgraded kernel will provide the timestamps and
> > should not fall back.
>
> Ok, but you'll break existing code which does only care about sample
> ordering if you do that at the session level unconditionally.
Good point. I'll give this some thought overnight.
Cheers,
-Ian
On Tue, 7 Dec 2010, Ian Munsie wrote:
> Excerpts from Thomas Gleixner's message of Tue Dec 07 00:04:20 +1100 2010:
> > > I just moved them into this routine to keep all the dispatching in one
> > > place, whether delayed or not. These particular events will still be
> > > processed immediately when encountered in the file. Only >=
> > > PERF_RECORD_MMAP && <= PERF_RECORD_SAMPLE will be delayed via the
> > > perf_session__process_timed function.
> >
> > Gah. This is nasty. I really prefer the explicit split of instant
> > processed and possibly delayed events. That makes the code clear and
> > easy to extend. I just want to add a new event type at the right place
> > and not worry about magic comparisions in some other place.
>
> Fair enough, I'll split them back out.
>
> > > For instance, suppose we ran this on an old kernel without support for
> > > timestamps on every event (so timestamps are only on sample events):
> > >
> > > perf record -T
> > > perf report
> > >
> > > If perf report tried to process the events in order, all the events
> > > without timestamps would be processed first -- including the
> > > PERF_RECORD_EXIT event, which would cause every sample not to be
> > > attributed. Falling back means we should get no worse than the old
> > > behaviour, while an upgraded kernel will provide the timestamps and
> > > should not fall back.
> >
> > Ok, but you'll break existing code which does only care about sample
> > ordering if you do that at the session level unconditionally.
>
> Good point. I'll give this some thought overnight.
Does the patch below on top of my previous one work for you ?
Thanks,
tglx
---
tools/perf/util/session.c | 70 ++++++++++++++++++++++++++++------------------
tools/perf/util/session.h | 1
2 files changed, 44 insertions(+), 27 deletions(-)
Index: linux-2.6-tip/tools/perf/util/session.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/util/session.c
+++ linux-2.6-tip/tools/perf/util/session.c
@@ -709,6 +709,29 @@ static int perf_session_deliver_event(st
}
}
+static int preprocess_sample_record(struct perf_session *session,
+ event_t *event, struct sample_data *sample)
+{
+ if (event->header.type != PERF_RECORD_SAMPLE)
+ return 0;
+
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ sample->pid, sample->tid, sample->ip, sample->period);
+
+ if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN))
+ return 0;
+
+ if (!ip_callchain__valid(sample->callchain, event)) {
+ pr_debug("call-chain problem with event, skipping it.\n");
+ ++session->hists.stats.nr_invalid_chains;
+ session->hists.stats.total_invalid_chains += sample->period;
+ return -EINVAL;
+ }
+
+ callchain__dump(sample);
+ return 0;
+}
+
static int perf_session__process_event(struct perf_session *session,
event_t *event,
struct perf_event_ops *ops,
@@ -722,13 +745,6 @@ static int perf_session__process_event(s
if (session->header.needs_swap && event__swap_ops[event->header.type])
event__swap_ops[event->header.type](event);
- if (event->header.type >= PERF_RECORD_MMAP &&
- event->header.type <= PERF_RECORD_SAMPLE) {
- event__parse_sample(event, session, &sample);
- if (dump_trace)
- perf_session__print_tstamp(session, event, &sample);
- }
-
if (event->header.type < PERF_RECORD_HEADER_MAX) {
dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
file_offset, event->header.size,
@@ -738,27 +754,16 @@ static int perf_session__process_event(s
/* These events are processed right away */
switch (event->header.type) {
- case PERF_RECORD_SAMPLE:
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n",
- event->header.misc,
- sample.pid, sample.tid, sample.ip, sample.period);
-
- if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
- if (!ip_callchain__valid(sample.callchain, event)) {
- pr_debug("call-chain problem with event, "
- "skipping it.\n");
- ++session->hists.stats.nr_invalid_chains;
- session->hists.stats.total_invalid_chains +=
- sample.period;
- return 0;
- }
-
- callchain__dump(&sample);
- }
- break;
-
case PERF_RECORD_HEADER_ATTR:
- return ops->attr(event, session);
+ /* This updates session->sample_id_all */
+ ret = ops->attr(event, session);
+ if (ops->ordering_requires_timestamps &&
+ ops->ordered_samples && !session->sample_id_all) {
+ session->ordered_samples.next_flush = ULLONG_MAX;
+ flush_sample_queue(session, ops);
+ ops->ordered_samples = false;
+ }
+ return ret;
case PERF_RECORD_HEADER_EVENT_TYPE:
return ops->event_type(event, session);
case PERF_RECORD_HEADER_TRACING_DATA:
@@ -773,6 +778,17 @@ static int perf_session__process_event(s
break;
}
+ /*
+ * For all non synthetized events we get the sample data
+ */
+ event__parse_sample(event, session, &sample);
+ if (dump_trace)
+ perf_session__print_tstamp(session, event, &sample);
+
+ /* Preprocess sample records - precheck callchains */
+ if (preprocess_sample_record(session, event, &sample))
+ return 0;
+
if (ops->ordered_samples) {
ret = perf_session_queue_event(session, event, &sample);
if (ret != -ETIME)
Index: linux-2.6-tip/tools/perf/util/session.h
===================================================================
--- linux-2.6-tip.orig/tools/perf/util/session.h
+++ linux-2.6-tip/tools/perf/util/session.h
@@ -82,6 +82,7 @@ struct perf_event_ops {
build_id;
event_op2 finished_round;
bool ordered_samples;
+ bool ordering_requires_timestamps;
};
struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);
Hi Thomas,
Excerpts from Thomas Gleixner's message of Tue Dec 07 01:41:22 +1100 2010:
> Does the patch below on top of my previous one work for you ?
Not quite. It does work on a patched kernel, but has some problems on
unpatched kernels and issues with perf report -D.
I set ordered_samples and ordering_requires_timestamps in perf report to
test this (I've included the changes below). Since you don't move the
dump_printfs like my patches, we can't set those flags unconditionally
in perf report, otherwise perf report -D would be completely broken.
That's OK, we can just set it conditionally on !dump_trace and report -D
will then always display events in file order. Personally I think it's
handy to see which order events are being processed in to track down why
samples aren't being attributed, but that's mostly important during
debugging the timestamp ordering, and perhaps not as useful otherwise.
> case PERF_RECORD_HEADER_ATTR:
> - return ops->attr(event, session);
> + /* This updates session->sample_id_all */
> + ret = ops->attr(event, session);
> + if (ops->ordering_requires_timestamps &&
> + ops->ordered_samples && !session->sample_id_all) {
> + session->ordered_samples.next_flush = ULLONG_MAX;
> + flush_sample_queue(session, ops);
> + ops->ordered_samples = false;
> + }
Makes sense. I did something similar in the report layer that I was
about to send when I saw this email, but this way we have a generic
solution for other parts of perf that might want this.
The problem here is that we only get the PERF_RECORD_HEADER_ATTR if perf
record has been piped somewhere, so running perf record <load>; perf
report on an unpatched kernel results in the COMM, MMAP, etc events
being processed last (timestamp -1ULL) and no userspace samples are
attributed at all:
$ ./perf report|cat
# Events: 1K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... ............. .....................
#
99.42% cachetest [unknown] [.] 0xf75e0ccb
0.45% cachetest [kernel] [k] page_fault
0.06% cachetest [kernel] [k] hpet_next_event
0.06% cachetest [kernel] [k] run_timer_softirq
0.01% cachetest [kernel] [k] flush_signal_handlers
0.00% cachetest [kernel] [k] native_write_msr_safe
> + /*
> + * For all non synthetized events we get the sample data
s/synthetized/synthesized/
> + event__parse_sample(event, session, &sample);
> + if (dump_trace)
> + perf_session__print_tstamp(session, event, &sample);
Moving this here after the dump_printf("%#Lx [%#x]: PERF_RECORD_%s"...
changes the output of perf report -D in a bad way. Changing the spacing
in dump_printf can make up for it, or juggle the code around some more.
How do you want to proceed? At this point either version of the patches
are pretty functionally equivelant. Mine does the perf report -D
reordering as well, but that isn't really necessary to solve the bug.
Either way we only have a few minor fixups left.
If we want to use yours, here's my v3 patch respun on top of them to
enable ordering by timestamps in record & report:
>From 581eb3fbad176a83d1830a792174ff5087015897 Mon Sep 17 00:00:00 2001
From: Ian Munsie <[email protected]>
Date: Tue, 7 Dec 2010 10:20:44 +1100
Subject: [PATCH v3] perf record/report: Process events in order
This patch changes perf report to ask for the ID info on all events be
default if recording from multiple CPUs.
Perf report (without -D) will now process the events in order if the
kernel is able to provide timestamps on all events. This ensures that
events such as COMM and MMAP which are necessary to correctly interpret
samples are processed prior to those samples so that they are attributed
correctly.
Before:
# perf record ./cachetest
# perf report
# Events: 6K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. ...............................
#
74.11% :3259 [unknown] [k] 0x4a6c
1.50% cachetest ld-2.11.2.so [.] 0x1777c
1.46% :3259 [kernel.kallsyms] [k] .perf_event_mmap_ctx
1.25% :3259 [kernel.kallsyms] [k] restore
0.74% :3259 [kernel.kallsyms] [k] ._raw_spin_lock
0.71% :3259 [kernel.kallsyms] [k] .filemap_fault
0.66% :3259 [kernel.kallsyms] [k] .memset
0.54% cachetest [kernel.kallsyms] [k] .sha_transform
0.54% :3259 [kernel.kallsyms] [k] .copy_4K_page
0.54% :3259 [kernel.kallsyms] [k] .find_get_page
0.52% :3259 [kernel.kallsyms] [k] .trace_hardirqs_off
0.50% :3259 [kernel.kallsyms] [k] .__do_fault
<SNIP>
After:
# perf report
# Events: 6K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. ...............................
#
44.28% cachetest cachetest [.] sumArrayNaive
22.53% cachetest cachetest [.] sumArrayOptimal
6.59% cachetest ld-2.11.2.so [.] 0x1777c
2.13% cachetest [unknown] [k] 0x340
1.46% cachetest [kernel.kallsyms] [k] .perf_event_mmap_ctx
1.25% cachetest [kernel.kallsyms] [k] restore
0.74% cachetest [kernel.kallsyms] [k] ._raw_spin_lock
0.71% cachetest [kernel.kallsyms] [k] .filemap_fault
0.66% cachetest [kernel.kallsyms] [k] .memset
0.54% cachetest [kernel.kallsyms] [k] .copy_4K_page
0.54% cachetest [kernel.kallsyms] [k] .find_get_page
0.54% cachetest [kernel.kallsyms] [k] .sha_transform
0.52% cachetest [kernel.kallsyms] [k] .trace_hardirqs_off
0.50% cachetest [kernel.kallsyms] [k] .__do_fault
<SNIP>
Signed-off-by: Ian Munsie <[email protected]>
---
tools/perf/builtin-record.c | 5 ++++-
tools/perf/builtin-report.c | 5 +++++
2 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 699dd21..310dd21 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -285,7 +285,7 @@ static void create_counter(int counter, int cpu)
if (system_wide)
attr->sample_type |= PERF_SAMPLE_CPU;
- if (sample_time)
+ if (sample_time || system_wide || !no_inherit || cpu_list)
attr->sample_type |= PERF_SAMPLE_TIME;
if (raw_samples) {
@@ -327,6 +327,9 @@ try_again:
* Old kernel, no attr->sample_id_type_all field
*/
sample_id_all_avail = false;
+ if (!sample_time && !raw_samples)
+ attr->sample_type &= ~PERF_SAMPLE_TIME;
+
goto retry_sample_id;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 904519f..d1ad782 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -244,6 +244,8 @@ static struct perf_event_ops event_ops = {
.event_type = event__process_event_type,
.tracing_data = event__process_tracing_data,
.build_id = event__process_build_id,
+ /* { .ordered_samples && !dump_trace } */
+ /* { .ordering_requires_timestamps && !dump_trace } */
};
extern volatile int session_done;
@@ -319,6 +321,9 @@ static int __cmd_report(void)
if (ret)
goto out_delete;
+ event_ops.ordered_samples = event_ops.ordering_requires_timestamps =
+ !dump_trace;
+
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;
--
1.7.2.3
Commit-ID: 1437a30aae865d83c7d96e3401f503a73fffe14d
Gitweb: http://git.kernel.org/tip/1437a30aae865d83c7d96e3401f503a73fffe14d
Author: Ian Munsie <[email protected]>
AuthorDate: Mon, 6 Dec 2010 13:37:04 +1100
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Mon, 6 Dec 2010 15:12:34 -0200
perf hist: Better displaying of unresolved DSOs and symbols
In the event that a DSO has not been identified, just print out [unknown]
instead of the instruction pointer as we previously were doing, which is pretty
meaningless for a shared object (at least to the users perspective).
The IP we print out is fairly meaningless in general anyway - it's just one
(the first) of the many addresses that were lumped together as unidentified,
and could span many shared objects and symbols. In reality if we see this
[unknown] output then the report -D output is going to be more useful anyway as
we can see all the different address that it represents.
If we are printing the symbols we are still going to see this IP in that column
anyway since they shouldn't resolve either.
This patch also changes the symbol address printouts so that they print out 0x
before the address, are left aligned, and changes the %L format string (which
relies on a glibc bug) to %ll.
Before:
74.11% :3259 4a6c [k] 4a6c
After:
74.11% :3259 [unknown] [k] 0x4a6c
Cc: Frederic Weisbecker <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ian Munsie <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/sort.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index b62a553..f44fa54 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
return repsep_snprintf(bf, size, "%-*s", width, dso_name);
}
- return repsep_snprintf(bf, size, "%*Lx", width, self->ip);
+ return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
}
/* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
if (verbose) {
char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
- ret += repsep_snprintf(bf, size, "%*Lx %c ",
+ ret += repsep_snprintf(bf, size, "%-#*llx %c ",
BITS_PER_LONG / 4, self->ip, o);
}
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
ret += repsep_snprintf(bf + ret, size - ret, "%s",
self->ms.sym->name);
else
- ret += repsep_snprintf(bf + ret, size - ret, "%*Lx",
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
BITS_PER_LONG / 4, self->ip);
return ret;
Commit-ID: cbf41645f35224798cb61641766e6a16e141ffe4
Gitweb: http://git.kernel.org/tip/cbf41645f35224798cb61641766e6a16e141ffe4
Author: Thomas Gleixner <[email protected]>
AuthorDate: Sun, 5 Dec 2010 14:32:55 +0100
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Mon, 6 Dec 2010 15:43:00 -0200
perf session: Sort all events if ordered_samples=true
Now that we have timestamps on FORK, EXIT, COMM, MMAP events we can
sort everything in time order. This fixes the following observed
problem:
mmap(file1) -> pagefault() -> munmap(file1)
mmap(file2) -> pagefault() -> munmap(file2)
Resulted in decoding both pagefaults in file2 because the file1 map
was already replaced by the file2 map when the map address was
identical.
With all events sorted we decode both pagefaults correctly.
Cc: Frederic Weisbecker <[email protected]>
Cc: Ian Munsie <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 125 ++++++++++++++++++++++++++-------------------
1 files changed, 72 insertions(+), 53 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5c75660..3074d38 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -461,6 +461,11 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
}
}
+static int perf_session_deliver_event(struct perf_session *session,
+ event_t *event,
+ struct sample_data *sample,
+ struct perf_event_ops *ops);
+
static void flush_sample_queue(struct perf_session *s,
struct perf_event_ops *ops)
{
@@ -479,7 +484,7 @@ static void flush_sample_queue(struct perf_session *s,
break;
event__parse_sample(iter->event, s, &sample);
- ops->sample(iter->event, &sample, s);
+ perf_session_deliver_event(s, iter->event, &sample, ops);
os->last_flush = iter->timestamp;
list_del(&iter->list);
@@ -544,8 +549,7 @@ static int process_finished_round(event_t *event __used,
}
/* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
- struct perf_session *s)
+static void __queue_event(struct sample_queue *new, struct perf_session *s)
{
struct ordered_samples *os = &s->ordered_samples;
struct sample_queue *sample = os->last_sample;
@@ -591,14 +595,17 @@ static void __queue_sample_event(struct sample_queue *new,
#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
-static int queue_sample_event(event_t *event, struct sample_data *data,
- struct perf_session *s)
+static int perf_session_queue_event(struct perf_session *s, event_t *event,
+ struct sample_data *data)
{
struct ordered_samples *os = &s->ordered_samples;
struct list_head *sc = &os->sample_cache;
u64 timestamp = data->time;
struct sample_queue *new;
+ if (!timestamp)
+ return -ETIME;
+
if (timestamp < s->ordered_samples.last_flush) {
printf("Warning: Timestamp below last timeslice flush\n");
return -EINVAL;
@@ -623,20 +630,8 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
new->timestamp = timestamp;
new->event = event;
- __queue_sample_event(new, s);
-
- return 0;
-}
-
-static int perf_session__process_sample(event_t *event,
- struct sample_data *sample,
- struct perf_session *s,
- struct perf_event_ops *ops)
-{
- if (!ops->ordered_samples)
- return ops->sample(event, sample, s);
+ __queue_event(new, s);
- queue_sample_event(event, sample, s);
return 0;
}
@@ -670,83 +665,107 @@ static void perf_session__print_tstamp(struct perf_session *session,
printf("%Lu ", sample->time);
}
-static int perf_session__process_event(struct perf_session *self,
+static int perf_session_deliver_event(struct perf_session *session,
+ event_t *event,
+ struct sample_data *sample,
+ struct perf_event_ops *ops)
+{
+ switch (event->header.type) {
+ case PERF_RECORD_SAMPLE:
+ return ops->sample(event, sample, session);
+ case PERF_RECORD_MMAP:
+ return ops->mmap(event, sample, session);
+ case PERF_RECORD_COMM:
+ return ops->comm(event, sample, session);
+ case PERF_RECORD_FORK:
+ return ops->fork(event, sample, session);
+ case PERF_RECORD_EXIT:
+ return ops->exit(event, sample, session);
+ case PERF_RECORD_LOST:
+ return ops->lost(event, sample, session);
+ case PERF_RECORD_READ:
+ return ops->read(event, sample, session);
+ case PERF_RECORD_THROTTLE:
+ return ops->throttle(event, sample, session);
+ case PERF_RECORD_UNTHROTTLE:
+ return ops->unthrottle(event, sample, session);
+ default:
+ ++session->hists.stats.nr_unknown_events;
+ return -1;
+ }
+}
+
+static int perf_session__process_event(struct perf_session *session,
event_t *event,
struct perf_event_ops *ops,
u64 file_offset)
{
struct sample_data sample;
+ int ret;
trace_event(event);
- if (self->header.needs_swap && event__swap_ops[event->header.type])
+ if (session->header.needs_swap && event__swap_ops[event->header.type])
event__swap_ops[event->header.type](event);
if (event->header.type >= PERF_RECORD_MMAP &&
event->header.type <= PERF_RECORD_SAMPLE) {
- event__parse_sample(event, self, &sample);
+ event__parse_sample(event, session, &sample);
if (dump_trace)
- perf_session__print_tstamp(self, event, &sample);
+ perf_session__print_tstamp(session, event, &sample);
}
if (event->header.type < PERF_RECORD_HEADER_MAX) {
dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
file_offset, event->header.size,
event__name[event->header.type]);
- hists__inc_nr_events(&self->hists, event->header.type);
+ hists__inc_nr_events(&session->hists, event->header.type);
}
+ /* These events are processed right away */
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
- dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n",
+ event->header.misc,
sample.pid, sample.tid, sample.ip, sample.period);
- if (self->sample_type & PERF_SAMPLE_CALLCHAIN) {
+ if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
if (!ip_callchain__valid(sample.callchain, event)) {
pr_debug("call-chain problem with event, "
"skipping it.\n");
- ++self->hists.stats.nr_invalid_chains;
- self->hists.stats.total_invalid_chains += sample.period;
+ ++session->hists.stats.nr_invalid_chains;
+ session->hists.stats.total_invalid_chains +=
+ sample.period;
return 0;
}
callchain__dump(&sample);
}
+ break;
- return perf_session__process_sample(event, &sample, self, ops);
-
- case PERF_RECORD_MMAP:
- return ops->mmap(event, &sample, self);
- case PERF_RECORD_COMM:
- return ops->comm(event, &sample, self);
- case PERF_RECORD_FORK:
- return ops->fork(event, &sample, self);
- case PERF_RECORD_EXIT:
- return ops->exit(event, &sample, self);
- case PERF_RECORD_LOST:
- return ops->lost(event, &sample, self);
- case PERF_RECORD_READ:
- return ops->read(event, &sample, self);
- case PERF_RECORD_THROTTLE:
- return ops->throttle(event, &sample, self);
- case PERF_RECORD_UNTHROTTLE:
- return ops->unthrottle(event, &sample, self);
case PERF_RECORD_HEADER_ATTR:
- return ops->attr(event, self);
+ return ops->attr(event, session);
case PERF_RECORD_HEADER_EVENT_TYPE:
- return ops->event_type(event, self);
+ return ops->event_type(event, session);
case PERF_RECORD_HEADER_TRACING_DATA:
/* setup for reading amidst mmap */
- lseek(self->fd, file_offset, SEEK_SET);
- return ops->tracing_data(event, self);
+ lseek(session->fd, file_offset, SEEK_SET);
+ return ops->tracing_data(event, session);
case PERF_RECORD_HEADER_BUILD_ID:
- return ops->build_id(event, self);
+ return ops->build_id(event, session);
case PERF_RECORD_FINISHED_ROUND:
- return ops->finished_round(event, self, ops);
+ return ops->finished_round(event, session, ops);
default:
- ++self->hists.stats.nr_unknown_events;
- return -1;
+ break;
}
+
+ if (ops->ordered_samples) {
+ ret = perf_session_queue_event(session, event, &sample);
+ if (ret != -ETIME)
+ return ret;
+ }
+
+ return perf_session_deliver_event(session, event, &sample, ops);
}
void perf_event_header__bswap(struct perf_event_header *self)
Ian,
On Tue, 7 Dec 2010, Ian Munsie wrote:
> > case PERF_RECORD_HEADER_ATTR:
> > - return ops->attr(event, session);
> > + /* This updates session->sample_id_all */
> > + ret = ops->attr(event, session);
> > + if (ops->ordering_requires_timestamps &&
> > + ops->ordered_samples && !session->sample_id_all) {
> > + session->ordered_samples.next_flush = ULLONG_MAX;
> > + flush_sample_queue(session, ops);
> > + ops->ordered_samples = false;
> > + }
>
> Makes sense. I did something similar in the report layer that I was
> about to send when I saw this email, but this way we have a generic
> solution for other parts of perf that might want this.
> The problem here is that we only get the PERF_RECORD_HEADER_ATTR if perf
> record has been piped somewhere, so running perf record <load>; perf
> report on an unpatched kernel results in the COMM, MMAP, etc events
> being processed last (timestamp -1ULL) and no userspace samples are
> attributed at all:
Ok. We need to treat timestamp ~0ULL the same as timestamp 0ULL then.
> > + event__parse_sample(event, session, &sample);
> > + if (dump_trace)
> > + perf_session__print_tstamp(session, event, &sample);
>
> Moving this here after the dump_printf("%#Lx [%#x]: PERF_RECORD_%s"...
> changes the output of perf report -D in a bad way. Changing the spacing
> in dump_printf can make up for it, or juggle the code around some more.
Crap. I wanted to restrict the sample parsing to the real events w/o
having this magic comparison in place as we filter out the synth stuff
in the switch case already.
> How do you want to proceed? At this point either version of the patches
> are pretty functionally equivelant. Mine does the perf report -D
Hmm. Arnaldo merged my version already.
> reordering as well, but that isn't really necessary to solve the bug.
> Either way we only have a few minor fixups left.
Having time ordered output of -D needs more than fixing the time stamp
issue. The dump_printf/dump_trace stuff is scattered all over the
place. So that needs more code churn, as you want to output the non
synth events when the ordered queue is drained.
Will send an update with that solved soon.
Thanks,
tglx
Em Tue, Dec 07, 2010 at 11:47:35AM +0100, Thomas Gleixner escreveu:
> On Tue, 7 Dec 2010, Ian Munsie wrote:
> > Makes sense. I did something similar in the report layer that I was
> > about to send when I saw this email, but this way we have a generic
> > solution for other parts of perf that might want this.
> > The problem here is that we only get the PERF_RECORD_HEADER_ATTR if perf
> > record has been piped somewhere, so running perf record <load>; perf
> > report on an unpatched kernel results in the COMM, MMAP, etc events
> > being processed last (timestamp -1ULL) and no userspace samples are
> > attributed at all:
>
> Ok. We need to treat timestamp ~0ULL the same as timestamp 0ULL then.
Right.
> > > + event__parse_sample(event, session, &sample);
> > > + if (dump_trace)
> > > + perf_session__print_tstamp(session, event, &sample);
> >
> > Moving this here after the dump_printf("%#Lx [%#x]: PERF_RECORD_%s"...
> > changes the output of perf report -D in a bad way. Changing the spacing
> > in dump_printf can make up for it, or juggle the code around some more.
>
> Crap. I wanted to restrict the sample parsing to the real events w/o
> having this magic comparison in place as we filter out the synth stuff
> in the switch case already.
>
> > How do you want to proceed? At this point either version of the patches
> > are pretty functionally equivelant. Mine does the perf report -D
>
> Hmm. Arnaldo merged my version already.
>
> > reordering as well, but that isn't really necessary to solve the bug.
> > Either way we only have a few minor fixups left.
>
> Having time ordered output of -D needs more than fixing the time stamp
> issue. The dump_printf/dump_trace stuff is scattered all over the
> place. So that needs more code churn, as you want to output the non
> synth events when the ordered queue is drained.
We can fix that, but then it was supposed to be a dump, something as it
comes from the perf.data file.
Perhaps we need something new, that does ordered dumps, I think we can
just move all the dump_fprintf stuff to one place, and have calls for
those where it is now (unordered) and another one from the ordered
place, but looking at different debug variables?
- Arnaldo
On Tue, 7 Dec 2010, Arnaldo Carvalho de Melo wrote:
> Em Tue, Dec 07, 2010 at 11:47:35AM +0100, Thomas Gleixner escreveu:
> > Having time ordered output of -D needs more than fixing the time stamp
> > issue. The dump_printf/dump_trace stuff is scattered all over the
> > place. So that needs more code churn, as you want to output the non
> > synth events when the ordered queue is drained.
>
> We can fix that, but then it was supposed to be a dump, something as it
> comes from the perf.data file.
>
> Perhaps we need something new, that does ordered dumps, I think we can
> just move all the dump_fprintf stuff to one place, and have calls for
> those where it is now (unordered) and another one from the ordered
> place, but looking at different debug variables?
I have a patch series ready to consolidate all this. Will send soon.
Thanks,
tglx