Hi Ingo,
Please pull from:
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux-2.6 perf/core
Regards,
- Arnaldo
Arnaldo Carvalho de Melo (2):
perf events: Precalculate the header space for PERF_SAMPLE_ fields
perf debug: Simplify trace_event
Thomas Gleixner (9):
perf session: Fix list sort algorithm
perf session: Use appropriate pointer type instead of silly typecasting
perf session: Cleanup __perf_session__process_events()
perf session: Move ui_progress_update in __perf_session__process_events()
perf session: Simplify termination checks
perf session: Use sensible mmap size
perf session: Keep file mmaped instead of malloc/memcpy
perf session: Cache sample objects
perf session: Allocate chunks of sample objects
include/linux/perf_event.h | 2 +
kernel/perf_event.c | 150 ++++++++++++++---------
tools/perf/util/debug.c | 41 ++-----
tools/perf/util/session.c | 289 ++++++++++++++++++++++++--------------------
tools/perf/util/session.h | 8 +-
5 files changed, 269 insertions(+), 221 deletions(-)
From: Thomas Gleixner <[email protected]>
The homebrewn sort algorithm fails to sort in time order. One of the problem
spots is that it fails to deal with equal timestamps correctly.
My first gut reaction was to replace the fancy list with an rbtree, but the
performance is 3 times worse.
Rewrite it so it works.
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 113 +++++++++++++++++++--------------------------
tools/perf/util/session.h | 4 +-
2 files changed, 49 insertions(+), 68 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3ae6955..daca557 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -104,7 +104,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
self->mmap_window = 32;
self->machines = RB_ROOT;
self->repipe = repipe;
- INIT_LIST_HEAD(&self->ordered_samples.samples_head);
+ INIT_LIST_HEAD(&self->ordered_samples.samples);
machine__init(&self->host_machine, "", HOST_KERNEL_ID);
if (mode == O_RDONLY) {
@@ -393,27 +393,33 @@ struct sample_queue {
static void flush_sample_queue(struct perf_session *s,
struct perf_event_ops *ops)
{
- struct list_head *head = &s->ordered_samples.samples_head;
- u64 limit = s->ordered_samples.next_flush;
+ struct ordered_samples *os = &s->ordered_samples;
+ struct list_head *head = &os->samples;
struct sample_queue *tmp, *iter;
+ u64 limit = os->next_flush;
+ u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
if (!ops->ordered_samples || !limit)
return;
list_for_each_entry_safe(iter, tmp, head, list) {
if (iter->timestamp > limit)
- return;
-
- if (iter == s->ordered_samples.last_inserted)
- s->ordered_samples.last_inserted = NULL;
+ break;
ops->sample((event_t *)iter->event, s);
- s->ordered_samples.last_flush = iter->timestamp;
+ os->last_flush = iter->timestamp;
list_del(&iter->list);
free(iter->event);
free(iter);
}
+
+ if (list_empty(head)) {
+ os->last_sample = NULL;
+ } else if (last_ts <= limit) {
+ os->last_sample =
+ list_entry(head->prev, struct sample_queue, list);
+ }
}
/*
@@ -465,71 +471,50 @@ static int process_finished_round(event_t *event __used,
return 0;
}
-static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
-{
- struct sample_queue *iter;
-
- list_for_each_entry_reverse(iter, head, list) {
- if (iter->timestamp < new->timestamp) {
- list_add(&new->list, &iter->list);
- return;
- }
- }
-
- list_add(&new->list, head);
-}
-
-static void __queue_sample_before(struct sample_queue *new,
- struct sample_queue *iter,
- struct list_head *head)
-{
- list_for_each_entry_continue_reverse(iter, head, list) {
- if (iter->timestamp < new->timestamp) {
- list_add(&new->list, &iter->list);
- return;
- }
- }
-
- list_add(&new->list, head);
-}
-
-static void __queue_sample_after(struct sample_queue *new,
- struct sample_queue *iter,
- struct list_head *head)
-{
- list_for_each_entry_continue(iter, head, list) {
- if (iter->timestamp > new->timestamp) {
- list_add_tail(&new->list, &iter->list);
- return;
- }
- }
- list_add_tail(&new->list, head);
-}
-
/* The queue is ordered by time */
static void __queue_sample_event(struct sample_queue *new,
struct perf_session *s)
{
- struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
- struct list_head *head = &s->ordered_samples.samples_head;
+ struct ordered_samples *os = &s->ordered_samples;
+ struct sample_queue *sample = os->last_sample;
+ u64 timestamp = new->timestamp;
+ struct list_head *p;
+ os->last_sample = new;
- if (!last_inserted) {
- __queue_sample_end(new, head);
+ if (!sample) {
+ list_add(&new->list, &os->samples);
+ os->max_timestamp = timestamp;
return;
}
/*
- * Most of the time the current event has a timestamp
- * very close to the last event inserted, unless we just switched
- * to another event buffer. Having a sorting based on a list and
- * on the last inserted event that is close to the current one is
- * probably more efficient than an rbtree based sorting.
+ * last_sample might point to some random place in the list as it's
+ * the last queued event. We expect that the new event is close to
+ * this.
*/
- if (last_inserted->timestamp >= new->timestamp)
- __queue_sample_before(new, last_inserted, head);
- else
- __queue_sample_after(new, last_inserted, head);
+ if (sample->timestamp <= timestamp) {
+ while (sample->timestamp <= timestamp) {
+ p = sample->list.next;
+ if (p == &os->samples) {
+ list_add_tail(&new->list, &os->samples);
+ os->max_timestamp = timestamp;
+ return;
+ }
+ sample = list_entry(p, struct sample_queue, list);
+ }
+ list_add_tail(&new->list, &sample->list);
+ } else {
+ while (sample->timestamp > timestamp) {
+ p = sample->list.prev;
+ if (p == &os->samples) {
+ list_add(&new->list, &os->samples);
+ return;
+ }
+ sample = list_entry(p, struct sample_queue, list);
+ }
+ list_add(&new->list, &sample->list);
+ }
}
static int queue_sample_event(event_t *event, struct sample_data *data,
@@ -559,10 +544,6 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
memcpy(new->event, event, event->header.size);
__queue_sample_event(new, s);
- s->ordered_samples.last_inserted = new;
-
- if (new->timestamp > s->ordered_samples.max_timestamp)
- s->ordered_samples.max_timestamp = new->timestamp;
return 0;
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9fa0fc2..a00f32e 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -17,8 +17,8 @@ struct ordered_samples {
u64 last_flush;
u64 next_flush;
u64 max_timestamp;
- struct list_head samples_head;
- struct sample_queue *last_inserted;
+ struct list_head samples;
+ struct sample_queue *last_sample;
};
struct perf_session {
--
1.6.2.5
From: Arnaldo Carvalho de Melo <[email protected]>
PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others
can be precalculated, reducing a bit the per sample cost.
Acked-by: Peter Zijlstra <[email protected]>
Cc: Frédéric Weisbecker <[email protected]>
Cc: Ian Munsie <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Stephane Eranian <[email protected]>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
include/linux/perf_event.h | 2 +
kernel/perf_event.c | 150 ++++++++++++++++++++++++++-----------------
2 files changed, 93 insertions(+), 59 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cbf04cc..adf6d99 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -758,6 +758,8 @@ struct perf_event {
u64 shadow_ctx_time;
struct perf_event_attr attr;
+ u16 header_size;
+ u16 read_size;
struct hw_perf_event hw;
struct perf_event_context *ctx;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index af1e63f..aede712 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_stat++;
}
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__read_size(struct perf_event *event)
+{
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (event->attr.read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (event->attr.read_format & PERF_FORMAT_GROUP) {
+ nr += event->group_leader->nr_siblings;
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+ event->read_size = size;
+}
+
+static void perf_event__header_size(struct perf_event *event)
+{
+ struct perf_sample_data *data;
+ u64 sample_type = event->attr.sample_type;
+ u16 size = 0;
+
+ perf_event__read_size(event);
+
+ if (sample_type & PERF_SAMPLE_IP)
+ size += sizeof(data->ip);
+
+ if (sample_type & PERF_SAMPLE_TID)
+ size += sizeof(data->tid_entry);
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ size += sizeof(data->time);
+
+ if (sample_type & PERF_SAMPLE_ADDR)
+ size += sizeof(data->addr);
+
+ if (sample_type & PERF_SAMPLE_ID)
+ size += sizeof(data->id);
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ size += sizeof(data->stream_id);
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ size += sizeof(data->cpu_entry);
+
+ if (sample_type & PERF_SAMPLE_PERIOD)
+ size += sizeof(data->period);
+
+ if (sample_type & PERF_SAMPLE_READ)
+ size += event->read_size;
+
+ event->header_size = size;
+}
+
static void perf_group_attach(struct perf_event *event)
{
- struct perf_event *group_leader = event->group_leader;
+ struct perf_event *group_leader = event->group_leader, *pos;
/*
* We can have double attach due to group movement in perf_event_open.
@@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event)
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
+
+ perf_event__header_size(group_leader);
+
+ list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
+ perf_event__header_size(pos);
}
/*
@@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event)
if (event->group_leader != event) {
list_del_init(&event->group_entry);
event->group_leader->nr_siblings--;
- return;
+ goto out;
}
if (!list_empty(&event->group_entry))
@@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event)
/* Inherit group flags from the previous leader */
sibling->group_flags = event->group_flags;
}
+
+out:
+ perf_event__header_size(event->group_leader);
+
+ list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
+ perf_event__header_size(tmp);
}
static inline int
@@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file)
return perf_event_release_kernel(event);
}
-static int perf_event_read_size(struct perf_event *event)
-{
- int entry = sizeof(u64); /* value */
- int size = 0;
- int nr = 1;
-
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- size += sizeof(u64);
-
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- size += sizeof(u64);
-
- if (event->attr.read_format & PERF_FORMAT_ID)
- entry += sizeof(u64);
-
- if (event->attr.read_format & PERF_FORMAT_GROUP) {
- nr += event->group_leader->nr_siblings;
- size += sizeof(u64);
- }
-
- size += entry * nr;
-
- return size;
-}
-
u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
{
struct perf_event *child;
@@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
if (event->state == PERF_EVENT_STATE_ERROR)
return 0;
- if (count < perf_event_read_size(event))
+ if (count < event->read_size)
return -ENOSPC;
WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header,
data->type = sample_type;
header->type = PERF_RECORD_SAMPLE;
- header->size = sizeof(*header);
+ header->size = sizeof(*header) + event->header_size;
header->misc = 0;
header->misc |= perf_misc_flags(regs);
- if (sample_type & PERF_SAMPLE_IP) {
+ if (sample_type & PERF_SAMPLE_IP)
data->ip = perf_instruction_pointer(regs);
- header->size += sizeof(data->ip);
- }
-
if (sample_type & PERF_SAMPLE_TID) {
/* namespace issues */
data->tid_entry.pid = perf_event_pid(event, current);
data->tid_entry.tid = perf_event_tid(event, current);
-
- header->size += sizeof(data->tid_entry);
}
- if (sample_type & PERF_SAMPLE_TIME) {
+ if (sample_type & PERF_SAMPLE_TIME)
data->time = perf_clock();
- header->size += sizeof(data->time);
- }
-
- if (sample_type & PERF_SAMPLE_ADDR)
- header->size += sizeof(data->addr);
-
- if (sample_type & PERF_SAMPLE_ID) {
+ if (sample_type & PERF_SAMPLE_ID)
data->id = primary_event_id(event);
- header->size += sizeof(data->id);
- }
-
- if (sample_type & PERF_SAMPLE_STREAM_ID) {
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
data->stream_id = event->id;
- header->size += sizeof(data->stream_id);
- }
-
if (sample_type & PERF_SAMPLE_CPU) {
data->cpu_entry.cpu = raw_smp_processor_id();
data->cpu_entry.reserved = 0;
-
- header->size += sizeof(data->cpu_entry);
}
- if (sample_type & PERF_SAMPLE_PERIOD)
- header->size += sizeof(data->period);
-
- if (sample_type & PERF_SAMPLE_READ)
- header->size += perf_event_read_size(event);
-
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
@@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event,
.header = {
.type = PERF_RECORD_READ,
.misc = 0,
- .size = sizeof(read_event) + perf_event_read_size(event),
+ .size = sizeof(read_event) + event->read_size,
},
.pid = perf_event_pid(event, task),
.tid = perf_event_tid(event, task),
@@ -5715,6 +5742,11 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_unlock(¤t->perf_event_mutex);
/*
+ * Precalculate sample_data sizes
+ */
+ perf_event__header_size(event);
+
+ /*
* Drop the reference on the group_event after placing the
* new event on the sibling_list. This ensures destruction
* of the group leader will find the pointer to itself in
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
The ordered sample code allocates singular reference objects struct
sample_queue which have 48byte size on 64bit and 20 bytes on 32bit. That's
silly. Allocate ~64k sized chunks and hand them out.
Performance gain: ~ 15%
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 21 ++++++++++++++++-----
tools/perf/util/session.h | 3 +++
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9fef587..52672da 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -114,6 +114,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
self->repipe = repipe;
INIT_LIST_HEAD(&self->ordered_samples.samples);
INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
+ INIT_LIST_HEAD(&self->ordered_samples.to_free);
machine__init(&self->host_machine, "", HOST_KERNEL_ID);
if (mode == O_RDONLY) {
@@ -403,10 +404,10 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
{
struct ordered_samples *os = &session->ordered_samples;
- while (!list_empty(&os->sample_cache)) {
+ while (!list_empty(&os->to_free)) {
struct sample_queue *sq;
- sq = list_entry(os->sample_cache.next, struct sample_queue, list);
+ sq = list_entry(os->to_free.next, struct sample_queue, list);
list_del(&sq->list);
free(sq);
}
@@ -538,10 +539,13 @@ static void __queue_sample_event(struct sample_queue *new,
}
}
+#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
+
static int queue_sample_event(event_t *event, struct sample_data *data,
struct perf_session *s)
{
- struct list_head *sc = &s->ordered_samples.sample_cache;
+ struct ordered_samples *os = &s->ordered_samples;
+ struct list_head *sc = &os->sample_cache;
u64 timestamp = data->time;
struct sample_queue *new;
@@ -553,10 +557,17 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
if (!list_empty(sc)) {
new = list_entry(sc->next, struct sample_queue, list);
list_del(&new->list);
+ } else if (os->sample_buffer) {
+ new = os->sample_buffer + os->sample_buffer_idx;
+ if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
+ os->sample_buffer = NULL;
} else {
- new = malloc(sizeof(*new));
- if (!new)
+ os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
+ if (!os->sample_buffer)
return -ENOMEM;
+ list_add(&os->sample_buffer->list, &os->to_free);
+ os->sample_buffer_idx = 2;
+ new = os->sample_buffer + 1;
}
new->timestamp = timestamp;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index e4a7ff2..5bf6efa 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -19,7 +19,10 @@ struct ordered_samples {
u64 max_timestamp;
struct list_head samples;
struct list_head sample_cache;
+ struct list_head to_free;
+ struct sample_queue *sample_buffer;
struct sample_queue *last_sample;
+ int sample_buffer_idx;
};
struct perf_session {
--
1.6.2.5
From: Arnaldo Carvalho de Melo <[email protected]>
No need to check that many times if debug_trace is on.
Cc: Frédéric Weisbecker <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Stephane Eranian <[email protected]>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/debug.c | 41 +++++++++++++----------------------------
1 files changed, 13 insertions(+), 28 deletions(-)
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index e5161e8..01bbe8e 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -57,21 +57,6 @@ void ui__warning(const char *format, ...)
}
#endif
-static int dump_printf_color(const char *fmt, const char *color, ...)
-{
- va_list args;
- int ret = 0;
-
- if (dump_trace) {
- va_start(args, color);
- ret = color_vfprintf(stdout, color, fmt, args);
- va_end(args);
- }
-
- return ret;
-}
-
-
void trace_event(event_t *event)
{
unsigned char *raw_event = (void *)event;
@@ -81,29 +66,29 @@ void trace_event(event_t *event)
if (!dump_trace)
return;
- dump_printf(".");
- dump_printf_color("\n. ... raw event: size %d bytes\n", color,
- event->header.size);
+ printf(".");
+ color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+ event->header.size);
for (i = 0; i < event->header.size; i++) {
if ((i & 15) == 0) {
- dump_printf(".");
- dump_printf_color(" %04x: ", color, i);
+ printf(".");
+ color_fprintf(stdout, color, " %04x: ", i);
}
- dump_printf_color(" %02x", color, raw_event[i]);
+ color_fprintf(stdout, color, " %02x", raw_event[i]);
if (((i & 15) == 15) || i == event->header.size-1) {
- dump_printf_color(" ", color);
+ color_fprintf(stdout, color, " ");
for (j = 0; j < 15-(i & 15); j++)
- dump_printf_color(" ", color);
+ color_fprintf(stdout, color, " ");
for (j = i & ~15; j <= i; j++) {
- dump_printf_color("%c", color,
- isprint(raw_event[j]) ?
- raw_event[j] : '.');
+ color_fprintf(stdout, color, "%c",
+ isprint(raw_event[j]) ?
+ raw_event[j] : '.');
}
- dump_printf_color("\n", color);
+ color_fprintf(stdout, color, "\n");
}
}
- dump_printf(".\n");
+ printf(".\n");
}
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
On 64bit we can map the whole file in one go, on 32bit we can at least map
32MB and not map/unmap tiny chunks of the file.
Base the progress bar on 1/16 of the data size.
Preparatory patch to get rid of the malloc/memcpy/free of trace data.
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 41 +++++++++++++++++++++++++++++------------
1 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9c806ab..752577f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -101,7 +101,15 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
INIT_LIST_HEAD(&self->dead_threads);
self->hists_tree = RB_ROOT;
self->last_match = NULL;
- self->mmap_window = 32;
+ /*
+ * On 64bit we can mmap the data file in one go. No need for tiny mmap
+ * slices. On 32bit we use 32MB.
+ */
+#if BITS_PER_LONG == 64
+ self->mmap_window = ULLONG_MAX;
+#else
+ self->mmap_window = 32 * 1024 * 1024ULL;
+#endif
self->machines = RB_ROOT;
self->repipe = repipe;
INIT_LIST_HEAD(&self->ordered_samples.samples);
@@ -738,18 +746,14 @@ int __perf_session__process_events(struct perf_session *session,
u64 data_offset, u64 data_size,
u64 file_size, struct perf_event_ops *ops)
{
- u64 head, page_offset, file_offset, file_pos;
+ u64 head, page_offset, file_offset, file_pos, progress_next;
int err, mmap_prot, mmap_flags;
struct ui_progress *progress;
- size_t page_size;
+ size_t page_size, mmap_size;
event_t *event;
uint32_t size;
char *buf;
- progress = ui_progress__new("Processing events...", session->size);
- if (progress == NULL)
- return -1;
-
perf_event_ops__fill_defaults(ops);
page_size = sysconf(_SC_PAGESIZE);
@@ -761,6 +765,15 @@ int __perf_session__process_events(struct perf_session *session,
if (data_offset + data_size < file_size)
file_size = data_offset + data_size;
+ progress_next = file_size / 16;
+ progress = ui_progress__new("Processing events...", file_size);
+ if (progress == NULL)
+ return -1;
+
+ mmap_size = session->mmap_window;
+ if (mmap_size > file_size)
+ mmap_size = file_size;
+
mmap_prot = PROT_READ;
mmap_flags = MAP_SHARED;
@@ -769,15 +782,14 @@ int __perf_session__process_events(struct perf_session *session,
mmap_flags = MAP_PRIVATE;
}
remap:
- buf = mmap(NULL, page_size * session->mmap_window, mmap_prot,
- mmap_flags, session->fd, file_offset);
+ buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
+ file_offset);
if (buf == MAP_FAILED) {
pr_err("failed to mmap file\n");
err = -errno;
goto out_err;
}
file_pos = file_offset + head;
- ui_progress__update(progress, file_offset);
more:
event = (event_t *)(buf + head);
@@ -788,10 +800,10 @@ more:
if (size == 0)
size = 8;
- if (head + event->header.size >= page_size * session->mmap_window) {
+ if (head + event->header.size >= mmap_size) {
int munmap_ret;
- munmap_ret = munmap(buf, page_size * session->mmap_window);
+ munmap_ret = munmap(buf, mmap_size);
assert(munmap_ret == 0);
page_offset = page_size * (head / page_size);
@@ -823,6 +835,11 @@ more:
head += size;
file_pos += size;
+ if (file_pos >= progress_next) {
+ progress_next += file_size / 16;
+ ui_progress__update(progress, file_pos);
+ }
+
if (file_pos < file_size)
goto more;
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
Profiling perf with perf revealed that a large part of the processing time is
spent in malloc/memcpy/free in the sample ordering code. That code copies the
data from the mmap into malloc'ed memory. That's silly. We can keep the mmap
and just store the pointer in the queuing data structure. For 64 bit this is
not a problem as we map the whole file anyway. On 32bit we keep 8 maps around
and unmap the oldest before mmaping the next chunk of the file.
Performance gain: 2.95s -> 1.23s (Faktor 2.4)
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 27 +++++++++++----------------
1 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 752577f..c989583 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -418,7 +418,6 @@ static void flush_sample_queue(struct perf_session *s,
os->last_flush = iter->timestamp;
list_del(&iter->list);
- free(iter->event);
free(iter);
}
@@ -531,7 +530,6 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
u64 timestamp = data->time;
struct sample_queue *new;
-
if (timestamp < s->ordered_samples.last_flush) {
printf("Warning: Timestamp below last timeslice flush\n");
return -EINVAL;
@@ -542,14 +540,7 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
return -ENOMEM;
new->timestamp = timestamp;
-
- new->event = malloc(event->header.size);
- if (!new->event) {
- free(new);
- return -ENOMEM;
- }
-
- memcpy(new->event, event, event->header.size);
+ new->event = event;
__queue_sample_event(new, s);
@@ -747,12 +738,12 @@ int __perf_session__process_events(struct perf_session *session,
u64 file_size, struct perf_event_ops *ops)
{
u64 head, page_offset, file_offset, file_pos, progress_next;
- int err, mmap_prot, mmap_flags;
+ int err, mmap_prot, mmap_flags, map_idx = 0;
struct ui_progress *progress;
size_t page_size, mmap_size;
+ char *buf, *mmaps[8];
event_t *event;
uint32_t size;
- char *buf;
perf_event_ops__fill_defaults(ops);
@@ -774,6 +765,8 @@ int __perf_session__process_events(struct perf_session *session,
if (mmap_size > file_size)
mmap_size = file_size;
+ memset(mmaps, 0, sizeof(mmaps));
+
mmap_prot = PROT_READ;
mmap_flags = MAP_SHARED;
@@ -789,6 +782,8 @@ remap:
err = -errno;
goto out_err;
}
+ mmaps[map_idx] = buf;
+ map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
file_pos = file_offset + head;
more:
@@ -801,10 +796,10 @@ more:
size = 8;
if (head + event->header.size >= mmap_size) {
- int munmap_ret;
-
- munmap_ret = munmap(buf, mmap_size);
- assert(munmap_ret == 0);
+ if (mmaps[map_idx]) {
+ munmap(mmaps[map_idx], mmap_size);
+ mmaps[map_idx] = NULL;
+ }
page_offset = page_size * (head / page_size);
file_offset += page_offset;
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
Replace the pseudo C++ self argument with session and give the mmap related
variables a sensible name. shift is a complete misnomer - it took me several
rounds of cursing to figure out that it's not a shift value.
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 77 +++++++++++++++++++++-----------------------
1 files changed, 37 insertions(+), 40 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 525bcf6..2fdbccf 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -567,13 +567,13 @@ static int perf_session__process_sample(event_t *event, struct perf_session *s,
static int perf_session__process_event(struct perf_session *self,
event_t *event,
struct perf_event_ops *ops,
- u64 offset, u64 head)
+ u64 file_offset)
{
trace_event(event);
if (event->header.type < PERF_RECORD_HEADER_MAX) {
dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
- offset + head, event->header.size,
+ file_offset, event->header.size,
event__name[event->header.type]);
hists__inc_nr_events(&self->hists, event->header.type);
}
@@ -606,7 +606,7 @@ static int perf_session__process_event(struct perf_session *self,
return ops->event_type(event, self);
case PERF_RECORD_HEADER_TRACING_DATA:
/* setup for reading amidst mmap */
- lseek(self->fd, offset + head, SEEK_SET);
+ lseek(self->fd, file_offset, SEEK_SET);
return ops->tracing_data(event, self);
case PERF_RECORD_HEADER_BUILD_ID:
return ops->build_id(event, self);
@@ -705,8 +705,7 @@ more:
}
if (size == 0 ||
- (skip = perf_session__process_event(self, &event, ops,
- 0, head)) < 0) {
+ (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
head, event.header.size, event.header.type);
/*
@@ -735,19 +734,19 @@ out_err:
return err;
}
-int __perf_session__process_events(struct perf_session *self,
+int __perf_session__process_events(struct perf_session *session,
u64 data_offset, u64 data_size,
u64 file_size, struct perf_event_ops *ops)
{
+ u64 head, page_offset, file_offset;
int err, mmap_prot, mmap_flags;
- u64 head, shift;
- u64 offset = 0;
+ struct ui_progress *progress;
size_t page_size;
event_t *event;
uint32_t size;
char *buf;
- struct ui_progress *progress = ui_progress__new("Processing events...",
- self->size);
+
+ progress = ui_progress__new("Processing events...", session->size);
if (progress == NULL)
return -1;
@@ -755,21 +754,20 @@ int __perf_session__process_events(struct perf_session *self,
page_size = sysconf(_SC_PAGESIZE);
- head = data_offset;
- shift = page_size * (head / page_size);
- offset += shift;
- head -= shift;
+ page_offset = page_size * (data_offset / page_size);
+ file_offset = page_offset;
+ head = data_offset - page_offset;
mmap_prot = PROT_READ;
mmap_flags = MAP_SHARED;
- if (self->header.needs_swap) {
+ if (session->header.needs_swap) {
mmap_prot |= PROT_WRITE;
mmap_flags = MAP_PRIVATE;
}
remap:
- buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
- mmap_flags, self->fd, offset);
+ buf = mmap(NULL, page_size * session->mmap_window, mmap_prot,
+ mmap_flags, session->fd, file_offset);
if (buf == MAP_FAILED) {
pr_err("failed to mmap file\n");
err = -errno;
@@ -778,36 +776,35 @@ remap:
more:
event = (event_t *)(buf + head);
- ui_progress__update(progress, offset);
+ ui_progress__update(progress, file_offset);
- if (self->header.needs_swap)
+ if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
size = event->header.size;
if (size == 0)
size = 8;
- if (head + event->header.size >= page_size * self->mmap_window) {
+ if (head + event->header.size >= page_size * session->mmap_window) {
int munmap_ret;
- shift = page_size * (head / page_size);
-
- munmap_ret = munmap(buf, page_size * self->mmap_window);
+ munmap_ret = munmap(buf, page_size * session->mmap_window);
assert(munmap_ret == 0);
- offset += shift;
- head -= shift;
+ page_offset = page_size * (head / page_size);
+ file_offset += page_offset;
+ head -= page_offset;
goto remap;
}
size = event->header.size;
dump_printf("\n%#Lx [%#x]: event: %d\n",
- offset + head, event->header.size, event->header.type);
+ file_offset + head, event->header.size, event->header.type);
- if (size == 0 ||
- perf_session__process_event(self, event, ops, offset, head) < 0) {
+ if (size == 0 || perf_session__process_event(session, event, ops,
+ file_offset + head) < 0) {
dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
- offset + head, event->header.size,
+ file_offset + head, event->header.size,
event->header.type);
/*
* assume we lost track of the stream, check alignment, and
@@ -821,36 +818,36 @@ more:
head += size;
- if (offset + head >= data_offset + data_size)
+ if (file_offset + head >= data_offset + data_size)
goto done;
- if (offset + head < file_size)
+ if (file_offset + head < file_size)
goto more;
done:
err = 0;
/* do the final flush for ordered samples */
- self->ordered_samples.next_flush = ULLONG_MAX;
- flush_sample_queue(self, ops);
+ session->ordered_samples.next_flush = ULLONG_MAX;
+ flush_sample_queue(session, ops);
out_err:
ui_progress__delete(progress);
if (ops->lost == event__process_lost &&
- self->hists.stats.total_lost != 0) {
+ session->hists.stats.total_lost != 0) {
ui__warning("Processed %Lu events and LOST %Lu!\n\n"
"Check IO/CPU overload!\n\n",
- self->hists.stats.total_period,
- self->hists.stats.total_lost);
+ session->hists.stats.total_period,
+ session->hists.stats.total_lost);
}
-
- if (self->hists.stats.nr_unknown_events != 0) {
+
+ if (session->hists.stats.nr_unknown_events != 0) {
ui__warning("Found %u unknown events!\n\n"
"Is this an older tool processing a perf.data "
"file generated by a more recent tool?\n\n"
"If that is not the case, consider "
"reporting to [email protected].\n\n",
- self->hists.stats.nr_unknown_events);
+ session->hists.stats.nr_unknown_events);
}
-
+
return err;
}
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
When the sample queue is flushed we free the sample reference objects. Though
we need to malloc new objects when we process further. Stop the malloc/free
orgy and cache the already allocated object for resuage. Only allocate when
the cache is empty.
Performance gain: ~ 10%
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 30 ++++++++++++++++++++++++++----
tools/perf/util/session.h | 1 +
2 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c989583..9fef587 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -113,6 +113,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
self->machines = RB_ROOT;
self->repipe = repipe;
INIT_LIST_HEAD(&self->ordered_samples.samples);
+ INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
machine__init(&self->host_machine, "", HOST_KERNEL_ID);
if (mode == O_RDONLY) {
@@ -398,6 +399,19 @@ struct sample_queue {
struct list_head list;
};
+static void perf_session_free_sample_buffers(struct perf_session *session)
+{
+ struct ordered_samples *os = &session->ordered_samples;
+
+ while (!list_empty(&os->sample_cache)) {
+ struct sample_queue *sq;
+
+ sq = list_entry(os->sample_cache.next, struct sample_queue, list);
+ list_del(&sq->list);
+ free(sq);
+ }
+}
+
static void flush_sample_queue(struct perf_session *s,
struct perf_event_ops *ops)
{
@@ -418,7 +432,7 @@ static void flush_sample_queue(struct perf_session *s,
os->last_flush = iter->timestamp;
list_del(&iter->list);
- free(iter);
+ list_add(&iter->list, &os->sample_cache);
}
if (list_empty(head)) {
@@ -527,6 +541,7 @@ static void __queue_sample_event(struct sample_queue *new,
static int queue_sample_event(event_t *event, struct sample_data *data,
struct perf_session *s)
{
+ struct list_head *sc = &s->ordered_samples.sample_cache;
u64 timestamp = data->time;
struct sample_queue *new;
@@ -535,9 +550,14 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
return -EINVAL;
}
- new = malloc(sizeof(*new));
- if (!new)
- return -ENOMEM;
+ if (!list_empty(sc)) {
+ new = list_entry(sc->next, struct sample_queue, list);
+ list_del(&new->list);
+ } else {
+ new = malloc(sizeof(*new));
+ if (!new)
+ return -ENOMEM;
+ }
new->timestamp = timestamp;
new->event = event;
@@ -730,6 +750,7 @@ more:
done:
err = 0;
out_err:
+ perf_session_free_sample_buffers(self);
return err;
}
@@ -862,6 +883,7 @@ out_err:
session->hists.stats.nr_unknown_events);
}
+ perf_session_free_sample_buffers(session);
return err;
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index a00f32e..e4a7ff2 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -18,6 +18,7 @@ struct ordered_samples {
u64 next_flush;
u64 max_timestamp;
struct list_head samples;
+ struct list_head sample_cache;
struct sample_queue *last_sample;
};
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
The progress bar is changed when the file offset changes. This happens only
when the next mmap is done. No need to call ui_progress_update() for every
event.
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 2fdbccf..3c140da 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -773,10 +773,10 @@ remap:
err = -errno;
goto out_err;
}
+ ui_progress__update(progress, file_offset);
more:
event = (event_t *)(buf + head);
- ui_progress__update(progress, file_offset);
if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
No need to check twice.
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 20 +++++++++++---------
1 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3c140da..9c806ab 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -738,7 +738,7 @@ int __perf_session__process_events(struct perf_session *session,
u64 data_offset, u64 data_size,
u64 file_size, struct perf_event_ops *ops)
{
- u64 head, page_offset, file_offset;
+ u64 head, page_offset, file_offset, file_pos;
int err, mmap_prot, mmap_flags;
struct ui_progress *progress;
size_t page_size;
@@ -758,6 +758,9 @@ int __perf_session__process_events(struct perf_session *session,
file_offset = page_offset;
head = data_offset - page_offset;
+ if (data_offset + data_size < file_size)
+ file_size = data_offset + data_size;
+
mmap_prot = PROT_READ;
mmap_flags = MAP_SHARED;
@@ -773,6 +776,7 @@ remap:
err = -errno;
goto out_err;
}
+ file_pos = file_offset + head;
ui_progress__update(progress, file_offset);
more:
@@ -799,10 +803,10 @@ more:
size = event->header.size;
dump_printf("\n%#Lx [%#x]: event: %d\n",
- file_offset + head, event->header.size, event->header.type);
+ file_pos, event->header.size, event->header.type);
- if (size == 0 || perf_session__process_event(session, event, ops,
- file_offset + head) < 0) {
+ if (size == 0 ||
+ perf_session__process_event(session, event, ops, file_pos) < 0) {
dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
file_offset + head, event->header.size,
event->header.type);
@@ -817,13 +821,11 @@ more:
}
head += size;
+ file_pos += size;
- if (file_offset + head >= data_offset + data_size)
- goto done;
-
- if (file_offset + head < file_size)
+ if (file_pos < file_size)
goto more;
-done:
+
err = 0;
/* do the final flush for ordered samples */
session->ordered_samples.next_flush = ULLONG_MAX;
--
1.6.2.5
From: Thomas Gleixner <[email protected]>
There is no reason to use a struct sample_event pointer in struct sample_queue
and type cast it when flushing the queue.
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/session.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index daca557..525bcf6 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -386,7 +386,7 @@ static event__swap_op event__swap_ops[] = {
struct sample_queue {
u64 timestamp;
- struct sample_event *event;
+ event_t *event;
struct list_head list;
};
@@ -406,7 +406,7 @@ static void flush_sample_queue(struct perf_session *s,
if (iter->timestamp > limit)
break;
- ops->sample((event_t *)iter->event, s);
+ ops->sample(iter->event, s);
os->last_flush = iter->timestamp;
list_del(&iter->list);
--
1.6.2.5
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> Hi Ingo,
>
> Please pull from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux-2.6 perf/core
>
> Regards,
>
> - Arnaldo
>
> Arnaldo Carvalho de Melo (2):
> perf events: Precalculate the header space for PERF_SAMPLE_ fields
> perf debug: Simplify trace_event
>
> Thomas Gleixner (9):
> perf session: Fix list sort algorithm
> perf session: Use appropriate pointer type instead of silly typecasting
> perf session: Cleanup __perf_session__process_events()
> perf session: Move ui_progress_update in __perf_session__process_events()
> perf session: Simplify termination checks
> perf session: Use sensible mmap size
> perf session: Keep file mmaped instead of malloc/memcpy
> perf session: Cache sample objects
> perf session: Allocate chunks of sample objects
>
> include/linux/perf_event.h | 2 +
> kernel/perf_event.c | 150 ++++++++++++++---------
> tools/perf/util/debug.c | 41 ++-----
> tools/perf/util/session.c | 289 ++++++++++++++++++++++++--------------------
> tools/perf/util/session.h | 8 +-
> 5 files changed, 269 insertions(+), 221 deletions(-)
Pulled, thanks a lot guys!
I also merged the followup fixes.
Note: I also merged the perf trace -> perf script rename commit that we tested in
-tip for some time, into perf/core - please double check any 'perf script' fallout.
Ingo