2009-11-24 05:26:07

by Li Zefan

[permalink] [raw]
Subject: [PATCH 0/5] perf kmem: Add more functions and show more statistics

List of new things:

- Add option "--raw-ip", to print raw ip instead of symbols.

- Sort the output by fragmentation by default, and support
multi keys.

- Collect and show cross node allocation stats.

- Collect and show alloc/free ping-pong stats.

- And help file.

---
tools/perf/Documentation/perf-kmem.txt | 44 ++++
tools/perf/builtin-kmem.c | 353 ++++++++++++++++++++++++++------
tools/perf/command-list.txt | 1 +
3 files changed, 331 insertions(+), 67 deletions(-)


Pekka, do you think we can remove kmemtrace now?

With kmem trace events, low-level analyzing can be done using
ftrace, and high-level analyzing can be done using perf-kmem.

And chance is, more people may use and improve perf-kmem, and it
will be well-maintained within the perf infrastructure. On the
other hand, I guess few people use and contribute to kmemtrace-user.

BTW, seems kmemtrace-user doesn't work with ftrace. I got setfault:

# ./kmemtraced
Copying /proc/kallsyms...
Logging... Press Control-C to stop.
^CSegmentation fault


2009-11-24 05:26:23

by Li Zefan

[permalink] [raw]
Subject: [PATCH 1/5] perf kmem: Add new option to show raw ip

Add option "--raw-ip" to show raw ip instead of symbols:

# ./perf kmem --stat caller --raw-ip
------------------------------------------------------------------------------
Callsite |Total_alloc/Per | Total_req/Per | Hit | Frag
------------------------------------------------------------------------------
0xc05301aa | 733184/4096 | 733184/4096 | 179| 0.000%
0xc0542ba0 | 483328/4096 | 483328/4096 | 118| 0.000%
...

Also show symbols with format sym+offset instead of sym/offset.

Signed-off-by: Li Zefan <[email protected]>
---
tools/perf/builtin-kmem.c | 18 ++++++++++--------
1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 256d18f..1ef43c2 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -32,15 +32,14 @@ sort_fn_t caller_sort_fn;
static int alloc_lines = -1;
static int caller_lines = -1;

+static bool raw_ip;
+
static char *cwd;
static int cwdlen;

struct alloc_stat {
union {
- struct {
- char *name;
- u64 call_site;
- };
+ u64 call_site;
u64 ptr;
};
u64 bytes_req;
@@ -323,12 +322,14 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)

if (is_caller) {
addr = data->call_site;
- sym = kernel_maps__find_symbol(addr, NULL, NULL);
+ if (!raw_ip)
+ sym = kernel_maps__find_symbol(addr,
+ NULL, NULL);
} else
addr = data->ptr;

if (sym != NULL)
- snprintf(bf, sizeof(bf), "%s/%Lx", sym->name,
+ snprintf(bf, sizeof(bf), "%s+%Lx", sym->name,
addr - sym->start);
else
snprintf(bf, sizeof(bf), "%#Lx", addr);
@@ -345,9 +346,9 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
}

if (n_lines == -1)
- printf(" ... | ... | ... | ... | ... \n");
+ printf(" ... | ... | ... | ... | ... \n");

- printf(" ------------------------------------------------------------------------------\n");
+ printf("%.78s\n", graph_dotted_line);
}

static void print_summary(void)
@@ -558,6 +559,7 @@ static const struct option kmem_options[] = {
OPT_CALLBACK('l', "line", NULL, "num",
"show n lins",
parse_line_opt),
+ OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END()
};

--
1.6.3

2009-11-24 05:26:45

by Li Zefan

[permalink] [raw]
Subject: [PATCH 2/5] perf kmem: Default to sort by fragmentation

Make the output sort by fragmentation by default.

Also make the usage of "--sort" option consistent with other perf
tools. That is, we support multi keys: "--sort key1[,key2]...".

# ./perf kmem --stat caller
------------------------------------------------------------------------------
Callsite |Total_alloc/Per | Total_req/Per | Hit | Frag
------------------------------------------------------------------------------
__netdev_alloc_skb+23 | 5048/1682 | 4564/1521 | 3| 9.588%
perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11| 5.011%
tracepoint_add_probe+32e | 157/31 | 154/30 | 5| 1.911%
alloc_buffer_head+16 | 456/57 | 448/56 | 8| 1.754%
radix_tree_preload+51 | 584/292 | 576/288 | 2| 1.370%
...

TODO:
- Extract duplicate code in builtin-kmem.c and builtin-sched.c
into util/sort.c.

Signed-off-by: Li Zefan <[email protected]>
---
tools/perf/builtin-kmem.c | 142 ++++++++++++++++++++++++++++++++++-----------
1 files changed, 108 insertions(+), 34 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1ef43c2..dc86f1e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -26,14 +26,13 @@ static u64 sample_type;
static int alloc_flag;
static int caller_flag;

-sort_fn_t alloc_sort_fn;
-sort_fn_t caller_sort_fn;
-
static int alloc_lines = -1;
static int caller_lines = -1;

static bool raw_ip;

+static char default_sort_order[] = "frag,hit,bytes";
+
static char *cwd;
static int cwdlen;

@@ -371,20 +370,34 @@ static void print_result(void)
print_summary();
}

+struct sort_dimension {
+ const char name[20];
+ sort_fn_t cmp;
+ struct list_head list;
+};
+
+static LIST_HEAD(caller_sort);
+static LIST_HEAD(alloc_sort);
+
static void sort_insert(struct rb_root *root, struct alloc_stat *data,
- sort_fn_t sort_fn)
+ struct list_head *sort_list)
{
struct rb_node **new = &(root->rb_node);
struct rb_node *parent = NULL;
+ struct sort_dimension *sort;

while (*new) {
struct alloc_stat *this;
- int cmp;
+ int cmp = 0;

this = rb_entry(*new, struct alloc_stat, node);
parent = *new;

- cmp = sort_fn(data, this);
+ list_for_each_entry(sort, sort_list, list) {
+ cmp = sort->cmp(data, this);
+ if (cmp)
+ break;
+ }

if (cmp > 0)
new = &((*new)->rb_left);
@@ -397,7 +410,7 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data,
}

static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
- sort_fn_t sort_fn)
+ struct list_head *sort_list)
{
struct rb_node *node;
struct alloc_stat *data;
@@ -409,14 +422,14 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,

rb_erase(node, root);
data = rb_entry(node, struct alloc_stat, node);
- sort_insert(root_sorted, data, sort_fn);
+ sort_insert(root_sorted, data, sort_list);
}
}

static void sort_result(void)
{
- __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn);
- __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn);
+ __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
+ __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
}

static int __cmd_kmem(void)
@@ -434,7 +447,6 @@ static const char * const kmem_usage[] = {
NULL
};

-
static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->ptr < r->ptr)
@@ -444,6 +456,11 @@ static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension ptr_sort_dimension = {
+ .name = "ptr",
+ .cmp = ptr_cmp,
+};
+
static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->call_site < r->call_site)
@@ -453,6 +470,11 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension callsite_sort_dimension = {
+ .name = "callsite",
+ .cmp = callsite_cmp,
+};
+
static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->hit < r->hit)
@@ -462,6 +484,11 @@ static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension hit_sort_dimension = {
+ .name = "hit",
+ .cmp = hit_cmp,
+};
+
static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->bytes_alloc < r->bytes_alloc)
@@ -471,6 +498,11 @@ static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension bytes_sort_dimension = {
+ .name = "bytes",
+ .cmp = bytes_cmp,
+};
+
static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
double x, y;
@@ -485,31 +517,73 @@ static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension frag_sort_dimension = {
+ .name = "frag",
+ .cmp = frag_cmp,
+};
+
+static struct sort_dimension *avail_sorts[] = {
+ &ptr_sort_dimension,
+ &callsite_sort_dimension,
+ &hit_sort_dimension,
+ &bytes_sort_dimension,
+ &frag_sort_dimension,
+};
+
+#define NUM_AVAIL_SORTS \
+ (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
+
+static int sort_dimension__add(const char *tok, struct list_head *list)
+{
+ struct sort_dimension *sort;
+ int i;
+
+ for (i = 0; i < NUM_AVAIL_SORTS; i++) {
+ if (!strcmp(avail_sorts[i]->name, tok)) {
+ sort = malloc(sizeof(*sort));
+ if (!sort)
+ die("malloc");
+ memcpy(sort, avail_sorts[i], sizeof(*sort));
+ list_add_tail(&sort->list, list);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static int setup_sorting(struct list_head *sort_list, const char *arg)
+{
+ char *tok;
+ char *str = strdup(arg);
+
+ if (!str)
+ die("strdup");
+
+ while (true) {
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+ if (sort_dimension__add(tok, sort_list) < 0) {
+ error("Unknown --sort key: '%s'", tok);
+ return -1;
+ }
+ }
+
+ free(str);
+ return 0;
+}
+
static int parse_sort_opt(const struct option *opt __used,
const char *arg, int unset __used)
{
- sort_fn_t sort_fn;
-
if (!arg)
return -1;

- if (strcmp(arg, "ptr") == 0)
- sort_fn = ptr_cmp;
- else if (strcmp(arg, "call_site") == 0)
- sort_fn = callsite_cmp;
- else if (strcmp(arg, "hit") == 0)
- sort_fn = hit_cmp;
- else if (strcmp(arg, "bytes") == 0)
- sort_fn = bytes_cmp;
- else if (strcmp(arg, "frag") == 0)
- sort_fn = frag_cmp;
- else
- return -1;
-
if (caller_flag > alloc_flag)
- caller_sort_fn = sort_fn;
+ return setup_sorting(&caller_sort, arg);
else
- alloc_sort_fn = sort_fn;
+ return setup_sorting(&alloc_sort, arg);

return 0;
}
@@ -553,8 +627,8 @@ static const struct option kmem_options[] = {
OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
"stat selector, Pass 'alloc' or 'caller'.",
parse_stat_opt),
- OPT_CALLBACK('s', "sort", NULL, "key",
- "sort by key: ptr, call_site, hit, bytes, frag",
+ OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
+ "sort by key(s): ptr, call_site, bytes, hit, frag",
parse_sort_opt),
OPT_CALLBACK('l', "line", NULL, "num",
"show n lins",
@@ -606,10 +680,10 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
else if (argc)
usage_with_options(kmem_usage, kmem_options);

- if (!alloc_sort_fn)
- alloc_sort_fn = bytes_cmp;
- if (!caller_sort_fn)
- caller_sort_fn = bytes_cmp;
+ if (list_empty(&caller_sort))
+ setup_sorting(&caller_sort, default_sort_order);
+ if (list_empty(&alloc_sort))
+ setup_sorting(&alloc_sort, default_sort_order);

return __cmd_kmem();
}
--
1.6.3

2009-11-24 05:27:06

by Li Zefan

[permalink] [raw]
Subject: [PATCH 3/5] perf kmem: Collect cross node allocation statistics

Show cross node memory allocations:

# ./perf kmem

SUMMARY
=======
...
Cross node allocations: 0/3633

Signed-off-by: Li Zefan <[email protected]>
---
tools/perf/builtin-kmem.c | 81 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index dc86f1e..1ecf3f4 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -36,6 +36,9 @@ static char default_sort_order[] = "frag,hit,bytes";
static char *cwd;
static int cwdlen;

+static int *cpunode_map;
+static int max_cpu_num;
+
struct alloc_stat {
union {
u64 call_site;
@@ -54,12 +57,74 @@ static struct rb_root root_caller_stat;
static struct rb_root root_caller_sorted;

static unsigned long total_requested, total_allocated;
+static unsigned long nr_allocs, nr_cross_allocs;

struct raw_event_sample {
u32 size;
char data[0];
};

+#define PATH_SYS_NODE "/sys/devices/system/node"
+
+static void init_cpunode_map(void)
+{
+ FILE *fp;
+ int i;
+
+ fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
+ if (!fp) {
+ max_cpu_num = 4096;
+ return;
+ }
+
+ if (fscanf(fp, "%d", &max_cpu_num) < 1)
+ die("Failed to read 'kernel_max' from sysfs");
+ max_cpu_num++;
+
+ cpunode_map = calloc(max_cpu_num, sizeof(int));
+ if (!cpunode_map)
+ die("calloc");
+ for (i = 0; i < max_cpu_num; i++)
+ cpunode_map[i] = -1;
+ fclose(fp);
+}
+
+static void setup_cpunode_map(void)
+{
+ struct dirent *dent1, *dent2;
+ DIR *dir1, *dir2;
+ unsigned int cpu, mem;
+ char buf[PATH_MAX];
+
+ init_cpunode_map();
+
+ dir1 = opendir(PATH_SYS_NODE);
+ if (!dir1)
+ return;
+
+ while (true) {
+ dent1 = readdir(dir1);
+ if (!dent1)
+ break;
+
+ if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ continue;
+
+ snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
+ dir2 = opendir(buf);
+ if (!dir2)
+ continue;
+ while (true) {
+ dent2 = readdir(dir2);
+ if (!dent2)
+ break;
+ if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ continue;
+ cpunode_map[cpu] = mem;
+ }
+ }
+}
+
static int
process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
@@ -157,15 +222,16 @@ static void insert_caller_stat(unsigned long call_site,

static void process_alloc_event(struct raw_event_sample *raw,
struct event *event,
- int cpu __used,
+ int cpu,
u64 timestamp __used,
struct thread *thread __used,
- int node __used)
+ int node)
{
unsigned long call_site;
unsigned long ptr;
int bytes_req;
int bytes_alloc;
+ int node1, node2;

ptr = raw_field_value(event, "ptr", raw->data);
call_site = raw_field_value(event, "call_site", raw->data);
@@ -177,6 +243,14 @@ static void process_alloc_event(struct raw_event_sample *raw,

total_requested += bytes_req;
total_allocated += bytes_alloc;
+
+ if (node) {
+ node1 = cpunode_map[cpu];
+ node2 = raw_field_value(event, "node", raw->data);
+ if (node1 != node2)
+ nr_cross_allocs++;
+ }
+ nr_allocs++;
}

static void process_free_event(struct raw_event_sample *raw __used,
@@ -359,6 +433,7 @@ static void print_summary(void)
total_allocated - total_requested);
printf("Internal fragmentation: %f%%\n",
fragmentation(total_requested, total_allocated));
+ printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
}

static void print_result(void)
@@ -685,6 +760,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
if (list_empty(&alloc_sort))
setup_sorting(&alloc_sort, default_sort_order);

+ setup_cpunode_map();
+
return __cmd_kmem();
}

--
1.6.3

2009-11-24 05:27:29

by Li Zefan

[permalink] [raw]
Subject: [PATCH 4/5] perf kmem: Measure kmalloc/kfree CPU ping-pong call-sites

Show statistics for allocations and frees on different cpus:

------------------------------------------------------------------------------------------------------
Callsite | Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag
------------------------------------------------------------------------------------------------------
perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11 | 0 | 5.011%
alloc_buffer_head+16 | 288/57 | 280/56 | 5 | 0 | 2.778%
radix_tree_preload+51 | 296/296 | 288/288 | 1 | 0 | 2.703%
tracepoint_add_probe+32e | 157/31 | 154/30 | 5 | 0 | 1.911%
do_maps_open+0 | 796/12 | 792/12 | 66 | 0 | 0.503%
sock_alloc_send_pskb+16e | 23780/495 | 23744/494 | 48 | 38 | 0.151%
anon_vma_prepare+9a | 3744/44 | 3740/44 | 85 | 0 | 0.107%
d_alloc+21 | 64948/164 | 64944/164 | 396 | 0 | 0.006%
proc_alloc_inode+23 | 262292/676 | 262288/676 | 388 | 0 | 0.002%
create_object+28 | 459600/200 | 459600/200 | 2298 | 71 | 0.000%
journal_start+67 | 14440/40 | 14440/40 | 361 | 0 | 0.000%
get_empty_filp+df | 53504/256 | 53504/256 | 209 | 0 | 0.000%
getname+2a | 823296/4096 | 823296/4096 | 201 | 0 | 0.000%
seq_read+2b0 | 544768/4096 | 544768/4096 | 133 | 0 | 0.000%
seq_open+6d | 17024/128 | 17024/128 | 133 | 0 | 0.000%
mmap_region+2e6 | 11704/88 | 11704/88 | 133 | 0 | 0.000%
single_open+0 | 1072/16 | 1072/16 | 67 | 0 | 0.000%
__alloc_skb+2e | 12544/256 | 12544/256 | 49 | 38 | 0.000%
__sigqueue_alloc+4a | 1296/144 | 1296/144 | 9 | 8 | 0.000%
tracepoint_add_probe+6f | 80/16 | 80/16 | 5 | 0 | 0.000%
------------------------------------------------------------------------------------------------------
...

Signed-off-by: Li Zefan <[email protected]>
---
tools/perf/builtin-kmem.c | 122 ++++++++++++++++++++++++++++++++++----------
1 files changed, 94 insertions(+), 28 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1ecf3f4..173d6db 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -40,13 +40,14 @@ static int *cpunode_map;
static int max_cpu_num;

struct alloc_stat {
- union {
- u64 call_site;
- u64 ptr;
- };
+ u64 call_site;
+ u64 ptr;
u64 bytes_req;
u64 bytes_alloc;
u32 hit;
+ u32 pingpong;
+
+ short alloc_cpu;

struct rb_node node;
};
@@ -144,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}

-static void insert_alloc_stat(unsigned long ptr,
- int bytes_req, int bytes_alloc)
+static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+ int bytes_req, int bytes_alloc, int cpu)
{
struct rb_node **node = &root_alloc_stat.rb_node;
struct rb_node *parent = NULL;
struct alloc_stat *data = NULL;

- if (!alloc_flag)
- return;
-
while (*node) {
parent = *node;
data = rb_entry(*node, struct alloc_stat, node);
@@ -172,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr,
data->bytes_alloc += bytes_req;
} else {
data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
data->ptr = ptr;
+ data->pingpong = 0;
data->hit = 1;
data->bytes_req = bytes_req;
data->bytes_alloc = bytes_alloc;
@@ -180,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr,
rb_link_node(&data->node, parent, node);
rb_insert_color(&data->node, &root_alloc_stat);
}
+ data->call_site = call_site;
+ data->alloc_cpu = cpu;
}

static void insert_caller_stat(unsigned long call_site,
@@ -189,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site,
struct rb_node *parent = NULL;
struct alloc_stat *data = NULL;

- if (!caller_flag)
- return;
-
while (*node) {
parent = *node;
data = rb_entry(*node, struct alloc_stat, node);
@@ -210,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site,
data->bytes_alloc += bytes_req;
} else {
data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
data->call_site = call_site;
+ data->pingpong = 0;
data->hit = 1;
data->bytes_req = bytes_req;
data->bytes_alloc = bytes_alloc;
@@ -238,7 +241,7 @@ static void process_alloc_event(struct raw_event_sample *raw,
bytes_req = raw_field_value(event, "bytes_req", raw->data);
bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);

- insert_alloc_stat(ptr, bytes_req, bytes_alloc);
+ insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
insert_caller_stat(call_site, bytes_req, bytes_alloc);

total_requested += bytes_req;
@@ -253,12 +256,58 @@ static void process_alloc_event(struct raw_event_sample *raw,
nr_allocs++;
}

-static void process_free_event(struct raw_event_sample *raw __used,
- struct event *event __used,
- int cpu __used,
+static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
+static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+ unsigned long call_site,
+ struct rb_root *root,
+ sort_fn_t sort_fn)
+{
+ struct rb_node *node = root->rb_node;
+ struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+ while (node) {
+ struct alloc_stat *data;
+ int cmp;
+
+ data = rb_entry(node, struct alloc_stat, node);
+
+ cmp = sort_fn(&key, data);
+ if (cmp < 0)
+ node = node->rb_left;
+ else if (cmp > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void process_free_event(struct raw_event_sample *raw,
+ struct event *event,
+ int cpu,
u64 timestamp __used,
struct thread *thread __used)
{
+ unsigned long ptr;
+ struct alloc_stat *s_alloc, *s_caller;
+
+ ptr = raw_field_value(event, "ptr", raw->data);
+
+ s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+ if (!s_alloc)
+ return;
+
+ if (cpu != s_alloc->alloc_cpu) {
+ s_alloc->pingpong++;
+
+ s_caller = search_alloc_stat(0, s_alloc->call_site,
+ &root_caller_stat, callsite_cmp);
+ assert(s_caller);
+ s_caller->pingpong++;
+ }
+ s_alloc->alloc_cpu = -1;
}

static void
@@ -379,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
{
struct rb_node *next;

- printf("%.78s\n", graph_dotted_line);
- printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr");
- printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n");
- printf("%.78s\n", graph_dotted_line);
+ printf("%.102s\n", graph_dotted_line);
+ printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
+ printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
+ printf("%.102s\n", graph_dotted_line);

next = rb_first(root);

@@ -390,7 +439,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
struct alloc_stat *data = rb_entry(next, struct alloc_stat,
node);
struct symbol *sym = NULL;
- char bf[BUFSIZ];
+ char buf[BUFSIZ];
u64 addr;

if (is_caller) {
@@ -402,26 +451,28 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
addr = data->ptr;

if (sym != NULL)
- snprintf(bf, sizeof(bf), "%s+%Lx", sym->name,
+ snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
addr - sym->start);
else
- snprintf(bf, sizeof(bf), "%#Lx", addr);
+ snprintf(buf, sizeof(buf), "%#Lx", addr);
+ printf(" %-34s |", buf);

- printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n",
- bf, (unsigned long long)data->bytes_alloc,
+ printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+ (unsigned long long)data->bytes_alloc,
(unsigned long)data->bytes_alloc / data->hit,
(unsigned long long)data->bytes_req,
(unsigned long)data->bytes_req / data->hit,
(unsigned long)data->hit,
+ (unsigned long)data->pingpong,
fragmentation(data->bytes_req, data->bytes_alloc));

next = rb_next(next);
}

if (n_lines == -1)
- printf(" ... | ... | ... | ... | ... \n");
+ printf(" ... | ... | ... | ... | ... | ... \n");

- printf("%.78s\n", graph_dotted_line);
+ printf("%.102s\n", graph_dotted_line);
}

static void print_summary(void)
@@ -597,12 +648,27 @@ static struct sort_dimension frag_sort_dimension = {
.cmp = frag_cmp,
};

+static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ if (l->pingpong < r->pingpong)
+ return -1;
+ else if (l->pingpong > r->pingpong)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension pingpong_sort_dimension = {
+ .name = "pingpong",
+ .cmp = pingpong_cmp,
+};
+
static struct sort_dimension *avail_sorts[] = {
&ptr_sort_dimension,
&callsite_sort_dimension,
&hit_sort_dimension,
&bytes_sort_dimension,
&frag_sort_dimension,
+ &pingpong_sort_dimension,
};

#define NUM_AVAIL_SORTS \
@@ -703,7 +769,7 @@ static const struct option kmem_options[] = {
"stat selector, Pass 'alloc' or 'caller'.",
parse_stat_opt),
OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
- "sort by key(s): ptr, call_site, bytes, hit, frag",
+ "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
parse_sort_opt),
OPT_CALLBACK('l', "line", NULL, "num",
"show n lins",
--
1.6.3

2009-11-24 05:27:45

by Li Zefan

[permalink] [raw]
Subject: [PATCH 5/5] perf kmem: Add help file

Add Documentation/perf-kmem.txt

Signed-off-by: Li Zefan <[email protected]>
---
tools/perf/Documentation/perf-kmem.txt | 44 ++++++++++++++++++++++++++++++++
tools/perf/command-list.txt | 1 +
2 files changed, 45 insertions(+), 0 deletions(-)
create mode 100644 tools/perf/Documentation/perf-kmem.txt

diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 0000000..44b0ce3
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,44 @@
+perf-kmem(1)
+==============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory(slab) properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record} [<options>]
+
+DESCRIPTION
+-----------
+There's two variants of perf kmem:
+
+ 'perf kmem record <command>' to record the kmem events
+ of an arbitrary workload.
+
+ 'perf kmem' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+ Select the input file (default: perf.data)
+
+--stat=<caller|alloc>::
+ Select per callsite or per allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+ Sort the output (default: frag,hit,bytes)
+
+-l <num>::
+--line=<num>::
+ Print n lines only
+
+--raw-ip::
+ Print raw ip instead of symbol
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index d3a6e18..02b09ea 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -14,3 +14,4 @@ perf-timechart mainporcelain common
perf-top mainporcelain common
perf-trace mainporcelain common
perf-probe mainporcelain common
+perf-kmem mainporcelain common
--
1.6.3

2009-11-24 07:15:08

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

Hi Li,

On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
> Pekka, do you think we can remove kmemtrace now?

One more use case I forgot to mention: boot time tracing. Much of the
persistent kernel memory footprint comes from the boot process which
is why it's important to be able to trace memory allocations
immediately after kmem_cache_init() has run. Can we make "perf kmem"
do that? Eduard put most of his efforts into making that work for
kmemtrace.

On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
> With kmem trace events, low-level analyzing can be done using
> ftrace, and high-level analyzing can be done using perf-kmem.
>
> And chance is, more people may use and improve perf-kmem, and it
> will be well-maintained within the perf infrastructure. On the
> other hand, I guess few people use and contribute to kmemtrace-user.

Sure, I think "perf kmem" is the way forward. I'd love to hear
Eduard's comments on this before we remove the code from kernel. Do we
need to do that for 2.6.33 or can we postpone that for 2.6.34?

Pekka

2009-11-24 07:18:52

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
> List of new things:
>
> - Add option "--raw-ip", to print raw ip instead of symbols.
>
> - Sort the output by fragmentation by default, and support
> ?multi keys.
>
> - Collect and show cross node allocation stats.
>
> - Collect and show alloc/free ping-pong stats.
>
> - And help file.

The series looks good to me!

Acked-by: Pekka Enberg <[email protected]>

2009-11-24 07:34:39

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics


* Pekka Enberg <[email protected]> wrote:

> Hi Li,
>
> On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
> > Pekka, do you think we can remove kmemtrace now?
>
> One more use case I forgot to mention: boot time tracing. Much of the
> persistent kernel memory footprint comes from the boot process which
> is why it's important to be able to trace memory allocations
> immediately after kmem_cache_init() has run. Can we make "perf kmem"
> do that? Eduard put most of his efforts into making that work for
> kmemtrace.

Would be lovely if someone looked at perf from that angle (and extended
it).

Another interesting area would be to allow a capture session without a
process context running immediately. (i.e. pre-allocate all the buffers,
use them, for a later 'perf save' to pick it up.)

The two are kind of the same thing conceptually: a boot time trace is a
preallocated 'process context less' recording, to be picked up after
bootup.

[ It also brings us 'stability/persistency of event logging' - i.e. a
capture session could be started and guaranteed by the kernel to be
underway, regardless of what user-space does. ]

Btw., Arjan is doing a _lot_ of boot time tracing for Moblin, and he
indicated it in the past that starting a perf recording session from an
initrd is a pretty practical substitute as well. (I've Cc:-ed Arjan.)

> On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
>
> > With kmem trace events, low-level analyzing can be done using
> > ftrace, and high-level analyzing can be done using perf-kmem.
> >
> > And chance is, more people may use and improve perf-kmem, and it
> > will be well-maintained within the perf infrastructure. On the other
> > hand, I guess few people use and contribute to kmemtrace-user.
>
> Sure, I think "perf kmem" is the way forward. I'd love to hear
> Eduard's comments on this before we remove the code from kernel. Do we
> need to do that for 2.6.33 or can we postpone that for 2.6.34?

Certainly we can postpone it, as long as there's rough strategic
consensus on the way forward. I'd hate to have two overlapping core
kernel facilities and friction between the groups pursuing them and
constant distraction from having two targets.

Such situations just rarely end with a good solution for the user - see
security modules for a horror story ...

[ I dont think it will occur here, just wanted to mention it out of
abundance of caution that 1.5 decades of kernel hacking experience
inflicts on me ;-) ]

Ingo

2009-11-24 07:44:56

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

Hi Ingo,

On Tue, Nov 24, 2009 at 9:34 AM, Ingo Molnar <[email protected]> wrote:
> Certainly we can postpone it, as long as there's rough strategic
> consensus on the way forward. I'd hate to have two overlapping core
> kernel facilities and friction between the groups pursuing them and
> constant distraction from having two targets.

Sure, like I said, I think "kmem perf" is the way forward. The only
reason we did kmemtrace userspace out-of-tree was because there was no
perf (or ftrace!) at the time and there wasn't much interest in
putting userspace tools in the tree.

I hope that counts as a "rough strategic consensus" :-)

2009-11-24 07:47:12

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics


* Pekka Enberg <[email protected]> wrote:

> Hi Ingo,
>
> On Tue, Nov 24, 2009 at 9:34 AM, Ingo Molnar <[email protected]> wrote:
> > Certainly we can postpone it, as long as there's rough strategic
> > consensus on the way forward. I'd hate to have two overlapping core
> > kernel facilities and friction between the groups pursuing them and
> > constant distraction from having two targets.
>
> Sure, like I said, I think "kmem perf" is the way forward. The only
> reason we did kmemtrace userspace out-of-tree was because there was no
> perf (or ftrace!) at the time and there wasn't much interest in
> putting userspace tools in the tree.
>
> I hope that counts as a "rough strategic consensus" :-)

it does :-)

Ingo

2009-11-24 08:04:49

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

Ingo Molnar wrote:
> * Pekka Enberg <[email protected]> wrote:
>
>> Hi Li,
>>
>> On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
>>> Pekka, do you think we can remove kmemtrace now?
>> One more use case I forgot to mention: boot time tracing. Much of the
>> persistent kernel memory footprint comes from the boot process which
>> is why it's important to be able to trace memory allocations
>> immediately after kmem_cache_init() has run. Can we make "perf kmem"
>> do that? Eduard put most of his efforts into making that work for
>> kmemtrace.
>
> Would be lovely if someone looked at perf from that angle (and extended
> it).
>
> Another interesting area would be to allow a capture session without a
> process context running immediately. (i.e. pre-allocate all the buffers,
> use them, for a later 'perf save' to pick it up.)
>
> The two are kind of the same thing conceptually: a boot time trace is a
> preallocated 'process context less' recording, to be picked up after
> bootup.
>
> [ It also brings us 'stability/persistency of event logging' - i.e. a
> capture session could be started and guaranteed by the kernel to be
> underway, regardless of what user-space does. ]
>
> Btw., Arjan is doing a _lot_ of boot time tracing for Moblin, and he
> indicated it in the past that starting a perf recording session from an
> initrd is a pretty practical substitute as well. (I've Cc:-ed Arjan.)
>

It would be great if perf can be used for boot time tracing. This needs
pretty big work on kernel side.

>> On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
>>
>>> With kmem trace events, low-level analyzing can be done using
>>> ftrace, and high-level analyzing can be done using perf-kmem.
>>>
>>> And chance is, more people may use and improve perf-kmem, and it
>>> will be well-maintained within the perf infrastructure. On the other
>>> hand, I guess few people use and contribute to kmemtrace-user.
>> Sure, I think "perf kmem" is the way forward. I'd love to hear
>> Eduard's comments on this before we remove the code from kernel. Do we
>> need to do that for 2.6.33 or can we postpone that for 2.6.34?
>
> Certainly we can postpone it, as long as there's rough strategic
> consensus on the way forward. I'd hate to have two overlapping core
> kernel facilities and friction between the groups pursuing them and
> constant distraction from having two targets.
>
> Such situations just rarely end with a good solution for the user - see
> security modules for a horror story ...
>
> [ I dont think it will occur here, just wanted to mention it out of
> abundance of caution that 1.5 decades of kernel hacking experience
> inflicts on me ;-) ]
>

Yeah, so we'd like to remove most of tracers, but I'm not rushing to
remove kmemtrace for .33.

2009-11-24 08:34:30

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics


* Li Zefan <[email protected]> wrote:

> Ingo Molnar wrote:
> > * Pekka Enberg <[email protected]> wrote:
> >
> >> Hi Li,
> >>
> >> On Tue, Nov 24, 2009 at 7:25 AM, Li Zefan <[email protected]> wrote:
> >>> Pekka, do you think we can remove kmemtrace now?
> >> One more use case I forgot to mention: boot time tracing. Much of the
> >> persistent kernel memory footprint comes from the boot process which
> >> is why it's important to be able to trace memory allocations
> >> immediately after kmem_cache_init() has run. Can we make "perf kmem"
> >> do that? Eduard put most of his efforts into making that work for
> >> kmemtrace.
> >
> > Would be lovely if someone looked at perf from that angle (and extended
> > it).
> >
> > Another interesting area would be to allow a capture session without a
> > process context running immediately. (i.e. pre-allocate all the buffers,
> > use them, for a later 'perf save' to pick it up.)
> >
> > The two are kind of the same thing conceptually: a boot time trace is a
> > preallocated 'process context less' recording, to be picked up after
> > bootup.
> >
> > [ It also brings us 'stability/persistency of event logging' - i.e. a
> > capture session could be started and guaranteed by the kernel to be
> > underway, regardless of what user-space does. ]
> >
> > Btw., Arjan is doing a _lot_ of boot time tracing for Moblin, and he
> > indicated it in the past that starting a perf recording session from
> > an initrd is a pretty practical substitute as well. (I've Cc:-ed
> > Arjan.)
>
> It would be great if perf can be used for boot time tracing. This
> needs pretty big work on kernel side.

What would be needed is to open per cpu events right after perf events
initializes, and allocate memory for output buffers to them.

They would round-robin after that point, and we could use
perf_event_open() (with a special flag) to 'attach' to them and mmap()
them - at which point they'd turn into regular objects with a lot of
boot time data in them.

Perhaps it should be possible to attach/detach from events in a flexible
way, not just during bootup. (bootup tracing is just a special case of
it.)

For example a 'flight recorder' could be started by creating the events
and then detaching from them. Whenever some exception is flagged the
monitoring context (whatever task that might be at that time - at
whatever point in the future) could attach to it and save it to a file
(or send it over any other transport of choice).

This would IMO mix the best of ftrace with the best of perf, for these
usecases.

[ Certainly not a feature for the faint hearted :-) ]

Ingo

2009-11-24 09:04:34

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics


a few more UI suggestions for 'perf kmem':

I think it should look similar to how 'perf' and 'perf sched' prints
sub-commands with increasing specificity, which means that we display a
list of subcommands and options when typed:

$ perf sched

usage: perf sched [<options>] {record|latency|map|replay|trace}

-i, --input <file> input file name
-v, --verbose be more verbose (show symbol address, etc)
-D, --dump-raw-trace dump raw trace in ASCII


For 'perf kmem' we could print something like:

$ perf kmem

usage: perf kmem [<options>] {record|report|trace}

-i, --input <file> input file name
-v, --verbose be more verbose (show symbol address, etc)
-D, --dump-raw-trace dump raw trace in ASCII

The advantage is that right now, when a new user sees the subcommand in
'perf' output:

$ perf
...
kmem Tool to trace/measure kernel memory(slab) properties
...

And types 'perf kmem', the following is displayed currently:

$ perf kmem

SUMMARY
=======
Total bytes requested: 0
Total bytes allocated: 0
Total bytes wasted on internal fragmentation: 0
Internal fragmentation: 0.000000%
Cross CPU allocations: 0/0

That's not very useful to someone who tries to figure out how to use
this command. A summary page would be more useful - and that would
advertise all the commands in a really short summary form (shorter than
-h/--help).

The other thing is that if someone types 'perf kmem record', the command
seems 'hung':

$ perf kmem record
<hang>

Now if i Ctrl-C it i see that a recording session was going on:

$ perf kmem record
^C[ perf record: Woken up 10 times to write data ]
[ perf record: Captured and wrote 1.327 MB perf.data (~57984 samples) ]

but this was not apparent from the tool output and the user was left
wondering about what is going on.

I think at minimum we should print a:

[ Recording all kmem events in the system, Ctrl-C to stop. ]

line. (on a related note, 'perf sched record' needs such a fix too.)

Another solution would be for 'perf kmem record' to work analogous to
'perf record': it could display a short help page by default, something
like:

$ perf kmem record

usage: perf kmem record [<options>] [<command>]

example: perf kmem record -a sleep 10 # capture all events for 10 seconds
perf kmem record /bin/ls # capture events of this command
perf kmem record -p 1234 # capture events of PID 1234

What do you think?

Also, a handful of mini-bugreports wrt. usability:

1)

running 'perf kmem' without having a perf.data gives:

earth4:~/tip/tools/perf> ./perf kmem
Failed to open file: perf.data (try 'perf record' first)

SUMMARY
=======
Total bytes requested: 0
Total bytes allocated: 0
Total bytes wasted on internal fragmentation: 0
Internal fragmentation: 0.000000%
Cross CPU allocations: 0/0

2)

running 'perf kmem record' on a box without kmem events gives:

earth4:~/tip/tools/perf> ./perf kmem record
invalid or unsupported event: 'kmem:kmalloc'
Run 'perf list' for a list of valid events

i think we want to print something kmem specific - and tell the user how
to enable kmem events or so - 'perf list' is not a solution to him.

3)

it doesnt seem to be working on one of my boxes, which has perf and kmem
events as well:

aldebaran:~/linux/linux/tools/perf> perf kmem record
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.050 MB perf.data (~2172 samples) ]

aldebaran:~/linux/linux/tools/perf> perf kmem

SUMMARY
=======
Total bytes requested: 0
Total bytes allocated: 0
Total bytes wasted on internal fragmentation: 0
Internal fragmentation: 0.000000%
Cross CPU allocations: 0/0
aldebaran:~/linux/linux/tools/perf>

Ingo

2009-11-24 09:39:11

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

Ingo Molnar wrote:
> a few more UI suggestions for 'perf kmem':
>

Thanks for the suggestions!

> I think it should look similar to how 'perf' and 'perf sched' prints
> sub-commands with increasing specificity, which means that we display a
> list of subcommands and options when typed:
>

Yes, I'd like to make the usage and output format similar to perf-sched.

> $ perf sched
>
> usage: perf sched [<options>] {record|latency|map|replay|trace}
>
> -i, --input <file> input file name
> -v, --verbose be more verbose (show symbol address, etc)
> -D, --dump-raw-trace dump raw trace in ASCII
>
>
> For 'perf kmem' we could print something like:
>
> $ perf kmem
>
> usage: perf kmem [<options>] {record|report|trace}
>
> -i, --input <file> input file name
> -v, --verbose be more verbose (show symbol address, etc)
> -D, --dump-raw-trace dump raw trace in ASCII
>
> The advantage is that right now, when a new user sees the subcommand in
> 'perf' output:
>
> $ perf
> ...
> kmem Tool to trace/measure kernel memory(slab) properties
> ...
>
> And types 'perf kmem', the following is displayed currently:
>
> $ perf kmem
>
> SUMMARY
> =======
> Total bytes requested: 0
> Total bytes allocated: 0
> Total bytes wasted on internal fragmentation: 0
> Internal fragmentation: 0.000000%
> Cross CPU allocations: 0/0
>
> That's not very useful to someone who tries to figure out how to use
> this command. A summary page would be more useful - and that would
> advertise all the commands in a really short summary form (shorter than
> -h/--help).
>

perf-timechart acts similarly - it won't show help page by "perf timechart"

# ./perf timechart
0xbc480 [0x18]: skipping unknown header type: 2
0xbc488 [(nil)]: skipping unknown header type: 238
0xbc490 [(nil)]: skipping unknown header type: 20034
Written 1.0 seconds of trace to output.svg.

But sure, I can change this for perf-kmem. So, do we want to do the same
for perf-timechart too?

> The other thing is that if someone types 'perf kmem record', the command
> seems 'hung':
>
> $ perf kmem record
> <hang>
>
> Now if i Ctrl-C it i see that a recording session was going on:
>
> $ perf kmem record
> ^C[ perf record: Woken up 10 times to write data ]
> [ perf record: Captured and wrote 1.327 MB perf.data (~57984 samples) ]
>
> but this was not apparent from the tool output and the user was left
> wondering about what is going on.
>
> I think at minimum we should print a:
>
> [ Recording all kmem events in the system, Ctrl-C to stop. ]
>
> line. (on a related note, 'perf sched record' needs such a fix too.)
>

Yes, I followed perf-sched and perf-timechart. ;)

I'll fix it for these tools.

> Another solution would be for 'perf kmem record' to work analogous to
> 'perf record': it could display a short help page by default, something
> like:
>
> $ perf kmem record
>
> usage: perf kmem record [<options>] [<command>]
>
> example: perf kmem record -a sleep 10 # capture all events for 10 seconds
> perf kmem record /bin/ls # capture events of this command
> perf kmem record -p 1234 # capture events of PID 1234
>
> What do you think?
>

But I'm not sure I like this, actually I prefer to just print
a line to explain what's going on.

> Also, a handful of mini-bugreports wrt. usability:
>
> 1)
>
> running 'perf kmem' without having a perf.data gives:
>
> earth4:~/tip/tools/perf> ./perf kmem
> Failed to open file: perf.data (try 'perf record' first)
>
> SUMMARY
> =======
> Total bytes requested: 0
> Total bytes allocated: 0
> Total bytes wasted on internal fragmentation: 0
> Internal fragmentation: 0.000000%
> Cross CPU allocations: 0/0
>

Again, this issue exists in perf-sched too..

So we need to fix not only perf-kmem.

> 2)
>
> running 'perf kmem record' on a box without kmem events gives:
>
> earth4:~/tip/tools/perf> ./perf kmem record
> invalid or unsupported event: 'kmem:kmalloc'
> Run 'perf list' for a list of valid events
>
> i think we want to print something kmem specific - and tell the user how
> to enable kmem events or so - 'perf list' is not a solution to him.
>

ditto

> 3)
>
> it doesnt seem to be working on one of my boxes, which has perf and kmem
> events as well:
>
> aldebaran:~/linux/linux/tools/perf> perf kmem record
> ^C[ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.050 MB perf.data (~2172 samples) ]
>

Seems no kmem event is recorded. No sure what happened here.

Might be that the parameters that perf-kmem passes to perf-record
are not properly selected?

Do perf-sched and perf-timechart work on this box?

> aldebaran:~/linux/linux/tools/perf> perf kmem
>
> SUMMARY
> =======
> Total bytes requested: 0
> Total bytes allocated: 0
> Total bytes wasted on internal fragmentation: 0
> Internal fragmentation: 0.000000%
> Cross CPU allocations: 0/0
> aldebaran:~/linux/linux/tools/perf>
>

2009-11-24 10:07:46

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics


* Li Zefan <[email protected]> wrote:

> > 3)
> >
> > it doesnt seem to be working on one of my boxes, which has perf and kmem
> > events as well:
> >
> > aldebaran:~/linux/linux/tools/perf> perf kmem record
> > ^C[ perf record: Woken up 1 times to write data ]
> > [ perf record: Captured and wrote 0.050 MB perf.data (~2172 samples) ]
> >
>
> Seems no kmem event is recorded. No sure what happened here.
>
> Might be that the parameters that perf-kmem passes to perf-record
> are not properly selected?
>
> Do perf-sched and perf-timechart work on this box?

yeah:

aldebaran:~> perf sched record sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.017 MB perf.data (~758 samples) ]
aldebaran:~> perf trace | tail -5
distccd-20944 [010] 1792.787376: sched_stat_runtime: comm=distccd pid=20944 runtime=11196 [ns] vruntime=696395420043 [ns]
init-0 [009] 1792.914837: sched_stat_wait: comm=x86_64-linux-gc pid=881 delay=10686 [ns]
init-0 [009] 1792.915082: sched_stat_sleep: comm=events/9 pid=44 delay=2183651362 [ns]
as-889 [013] 1793.008008: sched_stat_runtime: comm=as pid=889 runtime=156807 [ns] vruntime=1553569219042 [ns]
init-0 [004] 1793.154400: sched_stat_wait: comm=events/4 pid=39 delay=12155 [ns]

aldebaran:~> perf kmem record sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.078 MB perf.data (~3398 samples) ]
aldebaran:~> perf trace | tail -5
aldebaran:~>

the perf.data has mmap and exit events - but no kmem events.

I've attached the config, in case it matters. It runs latest -tip, with
your latest series applied as well.

Ingo


Attachments:
(No filename) (1.68 kB)
config (65.97 kB)
Download all attachments

2009-11-24 11:05:04

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

>> Do perf-sched and perf-timechart work on this box?
>
> yeah:
>
> aldebaran:~> perf sched record sleep 1
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.017 MB perf.data (~758 samples) ]
> aldebaran:~> perf trace | tail -5
> distccd-20944 [010] 1792.787376: sched_stat_runtime: comm=distccd pid=20944 runtime=11196 [ns] vruntime=696395420043 [ns]
> init-0 [009] 1792.914837: sched_stat_wait: comm=x86_64-linux-gc pid=881 delay=10686 [ns]
> init-0 [009] 1792.915082: sched_stat_sleep: comm=events/9 pid=44 delay=2183651362 [ns]
> as-889 [013] 1793.008008: sched_stat_runtime: comm=as pid=889 runtime=156807 [ns] vruntime=1553569219042 [ns]
> init-0 [004] 1793.154400: sched_stat_wait: comm=events/4 pid=39 delay=12155 [ns]
>
> aldebaran:~> perf kmem record sleep 1
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.078 MB perf.data (~3398 samples) ]
> aldebaran:~> perf trace | tail -5
> aldebaran:~>
>
> the perf.data has mmap and exit events - but no kmem events.
>

I was using yesterday's -tip tree, and I just updated to the latest -tip,
and I found perf tools are not working:

# ./perf kmem record sleep 3
...
# ./perf trace
perf-1805 [001] 66.239160: kmem_cache_free: ...
perf-1806 [000] 66.403561: kmem_cache_alloc: ...
swapper-0 [000] 66.420099: kmem_cache_free: ...
# ./perf kmem record sleep 3
...
# ./perf trace
# ./perf sched record sleep 3
.../
# ./perf trace
perf-1825 [000] 103.543014: sched_stat_runtime: ...
# ./perf sched record sleep 3
...
# ./perf trace
#

So I think some new updates on kernel perf_event break.

2009-11-24 14:56:08

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

On Tue, 24 Nov 2009 09:34:23 +0100
Ingo Molnar <[email protected]> wrote:
> > It would be great if perf can be used for boot time tracing. This
> > needs pretty big work on kernel side.
>
> What would be needed is to open per cpu events right after perf
> events initializes, and allocate memory for output buffers to them.
>
> They would round-robin after that point, and we could use
> perf_event_open() (with a special flag) to 'attach' to them and
> mmap() them - at which point they'd turn into regular objects with a
> lot of boot time data in them.

I'm not too worried about this btw;
we can start the userland trace early enough in the boot (the kernel is
done after 0.6 seconds after all) to capture the relevant stuff.
The actual kernel mostly gets captured with scripts/bootgraph.pl
already.

Yes it would be nice to do a timechart earlier, but if it's extremely
hard...
Also unless it starts before the drivers (eg the normal driver
initcall level), it is not useful.

2009-11-24 16:56:28

by Li Zefan

[permalink] [raw]
Subject: [tip:perf/core] perf kmem: Add new option to show raw ip

Commit-ID: 7707b6b6f8d9188b612f9fc88c65411264b1ed57
Gitweb: http://git.kernel.org/tip/7707b6b6f8d9188b612f9fc88c65411264b1ed57
Author: Li Zefan <[email protected]>
AuthorDate: Tue, 24 Nov 2009 13:25:48 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 24 Nov 2009 08:49:49 +0100

perf kmem: Add new option to show raw ip

Add option "--raw-ip" to show raw ip instead of symbols:

# ./perf kmem --stat caller --raw-ip
------------------------------------------------------------------------------
Callsite |Total_alloc/Per | Total_req/Per | Hit | Frag
------------------------------------------------------------------------------
0xc05301aa | 733184/4096 | 733184/4096 | 179| 0.000%
0xc0542ba0 | 483328/4096 | 483328/4096 | 118| 0.000%
...

Also show symbols with format sym+offset instead of sym/offset.

Signed-off-by: Li Zefan <[email protected]>
Acked-by: Pekka Enberg <[email protected]>
Cc: Eduard - Gabriel Munteanu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: [email protected] <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/builtin-kmem.c | 18 ++++++++++--------
1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 256d18f..1ef43c2 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -32,15 +32,14 @@ sort_fn_t caller_sort_fn;
static int alloc_lines = -1;
static int caller_lines = -1;

+static bool raw_ip;
+
static char *cwd;
static int cwdlen;

struct alloc_stat {
union {
- struct {
- char *name;
- u64 call_site;
- };
+ u64 call_site;
u64 ptr;
};
u64 bytes_req;
@@ -323,12 +322,14 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)

if (is_caller) {
addr = data->call_site;
- sym = kernel_maps__find_symbol(addr, NULL, NULL);
+ if (!raw_ip)
+ sym = kernel_maps__find_symbol(addr,
+ NULL, NULL);
} else
addr = data->ptr;

if (sym != NULL)
- snprintf(bf, sizeof(bf), "%s/%Lx", sym->name,
+ snprintf(bf, sizeof(bf), "%s+%Lx", sym->name,
addr - sym->start);
else
snprintf(bf, sizeof(bf), "%#Lx", addr);
@@ -345,9 +346,9 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
}

if (n_lines == -1)
- printf(" ... | ... | ... | ... | ... \n");
+ printf(" ... | ... | ... | ... | ... \n");

- printf(" ------------------------------------------------------------------------------\n");
+ printf("%.78s\n", graph_dotted_line);
}

static void print_summary(void)
@@ -558,6 +559,7 @@ static const struct option kmem_options[] = {
OPT_CALLBACK('l', "line", NULL, "num",
"show n lins",
parse_line_opt),
+ OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END()
};

2009-11-24 16:56:11

by Li Zefan

[permalink] [raw]
Subject: [tip:perf/core] perf kmem: Default to sort by fragmentation

Commit-ID: 29b3e15289eb66788a0bf5ea4903f9fbeb1ec751
Gitweb: http://git.kernel.org/tip/29b3e15289eb66788a0bf5ea4903f9fbeb1ec751
Author: Li Zefan <[email protected]>
AuthorDate: Tue, 24 Nov 2009 13:26:10 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 24 Nov 2009 08:49:49 +0100

perf kmem: Default to sort by fragmentation

Make the output sort by fragmentation by default.

Also make the usage of "--sort" option consistent with other
perf tools. That is, we support multi keys: "--sort
key1[,key2]...".

# ./perf kmem --stat caller
------------------------------------------------------------------------------
Callsite |Total_alloc/Per | Total_req/Per | Hit | Frag
------------------------------------------------------------------------------
__netdev_alloc_skb+23 | 5048/1682 | 4564/1521 | 3| 9.588%
perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11| 5.011%
tracepoint_add_probe+32e | 157/31 | 154/30 | 5| 1.911%
alloc_buffer_head+16 | 456/57 | 448/56 | 8| 1.754%
radix_tree_preload+51 | 584/292 | 576/288 | 2| 1.370%
...

TODO:
- Extract duplicate code in builtin-kmem.c and builtin-sched.c
into util/sort.c.

Signed-off-by: Li Zefan <[email protected]>
Acked-by: Pekka Enberg <[email protected]>
Cc: Eduard - Gabriel Munteanu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: [email protected] <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/builtin-kmem.c | 142 ++++++++++++++++++++++++++++++++++-----------
1 files changed, 108 insertions(+), 34 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1ef43c2..dc86f1e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -26,14 +26,13 @@ static u64 sample_type;
static int alloc_flag;
static int caller_flag;

-sort_fn_t alloc_sort_fn;
-sort_fn_t caller_sort_fn;
-
static int alloc_lines = -1;
static int caller_lines = -1;

static bool raw_ip;

+static char default_sort_order[] = "frag,hit,bytes";
+
static char *cwd;
static int cwdlen;

@@ -371,20 +370,34 @@ static void print_result(void)
print_summary();
}

+struct sort_dimension {
+ const char name[20];
+ sort_fn_t cmp;
+ struct list_head list;
+};
+
+static LIST_HEAD(caller_sort);
+static LIST_HEAD(alloc_sort);
+
static void sort_insert(struct rb_root *root, struct alloc_stat *data,
- sort_fn_t sort_fn)
+ struct list_head *sort_list)
{
struct rb_node **new = &(root->rb_node);
struct rb_node *parent = NULL;
+ struct sort_dimension *sort;

while (*new) {
struct alloc_stat *this;
- int cmp;
+ int cmp = 0;

this = rb_entry(*new, struct alloc_stat, node);
parent = *new;

- cmp = sort_fn(data, this);
+ list_for_each_entry(sort, sort_list, list) {
+ cmp = sort->cmp(data, this);
+ if (cmp)
+ break;
+ }

if (cmp > 0)
new = &((*new)->rb_left);
@@ -397,7 +410,7 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data,
}

static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
- sort_fn_t sort_fn)
+ struct list_head *sort_list)
{
struct rb_node *node;
struct alloc_stat *data;
@@ -409,14 +422,14 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,

rb_erase(node, root);
data = rb_entry(node, struct alloc_stat, node);
- sort_insert(root_sorted, data, sort_fn);
+ sort_insert(root_sorted, data, sort_list);
}
}

static void sort_result(void)
{
- __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn);
- __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn);
+ __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
+ __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
}

static int __cmd_kmem(void)
@@ -434,7 +447,6 @@ static const char * const kmem_usage[] = {
NULL
};

-
static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->ptr < r->ptr)
@@ -444,6 +456,11 @@ static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension ptr_sort_dimension = {
+ .name = "ptr",
+ .cmp = ptr_cmp,
+};
+
static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->call_site < r->call_site)
@@ -453,6 +470,11 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension callsite_sort_dimension = {
+ .name = "callsite",
+ .cmp = callsite_cmp,
+};
+
static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->hit < r->hit)
@@ -462,6 +484,11 @@ static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension hit_sort_dimension = {
+ .name = "hit",
+ .cmp = hit_cmp,
+};
+
static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
if (l->bytes_alloc < r->bytes_alloc)
@@ -471,6 +498,11 @@ static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension bytes_sort_dimension = {
+ .name = "bytes",
+ .cmp = bytes_cmp,
+};
+
static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
{
double x, y;
@@ -485,31 +517,73 @@ static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
return 0;
}

+static struct sort_dimension frag_sort_dimension = {
+ .name = "frag",
+ .cmp = frag_cmp,
+};
+
+static struct sort_dimension *avail_sorts[] = {
+ &ptr_sort_dimension,
+ &callsite_sort_dimension,
+ &hit_sort_dimension,
+ &bytes_sort_dimension,
+ &frag_sort_dimension,
+};
+
+#define NUM_AVAIL_SORTS \
+ (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
+
+static int sort_dimension__add(const char *tok, struct list_head *list)
+{
+ struct sort_dimension *sort;
+ int i;
+
+ for (i = 0; i < NUM_AVAIL_SORTS; i++) {
+ if (!strcmp(avail_sorts[i]->name, tok)) {
+ sort = malloc(sizeof(*sort));
+ if (!sort)
+ die("malloc");
+ memcpy(sort, avail_sorts[i], sizeof(*sort));
+ list_add_tail(&sort->list, list);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static int setup_sorting(struct list_head *sort_list, const char *arg)
+{
+ char *tok;
+ char *str = strdup(arg);
+
+ if (!str)
+ die("strdup");
+
+ while (true) {
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+ if (sort_dimension__add(tok, sort_list) < 0) {
+ error("Unknown --sort key: '%s'", tok);
+ return -1;
+ }
+ }
+
+ free(str);
+ return 0;
+}
+
static int parse_sort_opt(const struct option *opt __used,
const char *arg, int unset __used)
{
- sort_fn_t sort_fn;
-
if (!arg)
return -1;

- if (strcmp(arg, "ptr") == 0)
- sort_fn = ptr_cmp;
- else if (strcmp(arg, "call_site") == 0)
- sort_fn = callsite_cmp;
- else if (strcmp(arg, "hit") == 0)
- sort_fn = hit_cmp;
- else if (strcmp(arg, "bytes") == 0)
- sort_fn = bytes_cmp;
- else if (strcmp(arg, "frag") == 0)
- sort_fn = frag_cmp;
- else
- return -1;
-
if (caller_flag > alloc_flag)
- caller_sort_fn = sort_fn;
+ return setup_sorting(&caller_sort, arg);
else
- alloc_sort_fn = sort_fn;
+ return setup_sorting(&alloc_sort, arg);

return 0;
}
@@ -553,8 +627,8 @@ static const struct option kmem_options[] = {
OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
"stat selector, Pass 'alloc' or 'caller'.",
parse_stat_opt),
- OPT_CALLBACK('s', "sort", NULL, "key",
- "sort by key: ptr, call_site, hit, bytes, frag",
+ OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
+ "sort by key(s): ptr, call_site, bytes, hit, frag",
parse_sort_opt),
OPT_CALLBACK('l', "line", NULL, "num",
"show n lins",
@@ -606,10 +680,10 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
else if (argc)
usage_with_options(kmem_usage, kmem_options);

- if (!alloc_sort_fn)
- alloc_sort_fn = bytes_cmp;
- if (!caller_sort_fn)
- caller_sort_fn = bytes_cmp;
+ if (list_empty(&caller_sort))
+ setup_sorting(&caller_sort, default_sort_order);
+ if (list_empty(&alloc_sort))
+ setup_sorting(&alloc_sort, default_sort_order);

return __cmd_kmem();
}

2009-11-24 16:56:27

by Li Zefan

[permalink] [raw]
Subject: [tip:perf/core] perf kmem: Collect cross node allocation statistics

Commit-ID: 7d0d39459dab20bf60cac30a1a7d50b286c60cc1
Gitweb: http://git.kernel.org/tip/7d0d39459dab20bf60cac30a1a7d50b286c60cc1
Author: Li Zefan <[email protected]>
AuthorDate: Tue, 24 Nov 2009 13:26:31 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 24 Nov 2009 08:49:50 +0100

perf kmem: Collect cross node allocation statistics

Show cross node memory allocations:

# ./perf kmem

SUMMARY
=======
...
Cross node allocations: 0/3633

Signed-off-by: Li Zefan <[email protected]>
Acked-by: Pekka Enberg <[email protected]>
Cc: Eduard - Gabriel Munteanu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: [email protected] <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/builtin-kmem.c | 81 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index dc86f1e..1ecf3f4 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -36,6 +36,9 @@ static char default_sort_order[] = "frag,hit,bytes";
static char *cwd;
static int cwdlen;

+static int *cpunode_map;
+static int max_cpu_num;
+
struct alloc_stat {
union {
u64 call_site;
@@ -54,12 +57,74 @@ static struct rb_root root_caller_stat;
static struct rb_root root_caller_sorted;

static unsigned long total_requested, total_allocated;
+static unsigned long nr_allocs, nr_cross_allocs;

struct raw_event_sample {
u32 size;
char data[0];
};

+#define PATH_SYS_NODE "/sys/devices/system/node"
+
+static void init_cpunode_map(void)
+{
+ FILE *fp;
+ int i;
+
+ fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
+ if (!fp) {
+ max_cpu_num = 4096;
+ return;
+ }
+
+ if (fscanf(fp, "%d", &max_cpu_num) < 1)
+ die("Failed to read 'kernel_max' from sysfs");
+ max_cpu_num++;
+
+ cpunode_map = calloc(max_cpu_num, sizeof(int));
+ if (!cpunode_map)
+ die("calloc");
+ for (i = 0; i < max_cpu_num; i++)
+ cpunode_map[i] = -1;
+ fclose(fp);
+}
+
+static void setup_cpunode_map(void)
+{
+ struct dirent *dent1, *dent2;
+ DIR *dir1, *dir2;
+ unsigned int cpu, mem;
+ char buf[PATH_MAX];
+
+ init_cpunode_map();
+
+ dir1 = opendir(PATH_SYS_NODE);
+ if (!dir1)
+ return;
+
+ while (true) {
+ dent1 = readdir(dir1);
+ if (!dent1)
+ break;
+
+ if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ continue;
+
+ snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
+ dir2 = opendir(buf);
+ if (!dir2)
+ continue;
+ while (true) {
+ dent2 = readdir(dir2);
+ if (!dent2)
+ break;
+ if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ continue;
+ cpunode_map[cpu] = mem;
+ }
+ }
+}
+
static int
process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
@@ -157,15 +222,16 @@ static void insert_caller_stat(unsigned long call_site,

static void process_alloc_event(struct raw_event_sample *raw,
struct event *event,
- int cpu __used,
+ int cpu,
u64 timestamp __used,
struct thread *thread __used,
- int node __used)
+ int node)
{
unsigned long call_site;
unsigned long ptr;
int bytes_req;
int bytes_alloc;
+ int node1, node2;

ptr = raw_field_value(event, "ptr", raw->data);
call_site = raw_field_value(event, "call_site", raw->data);
@@ -177,6 +243,14 @@ static void process_alloc_event(struct raw_event_sample *raw,

total_requested += bytes_req;
total_allocated += bytes_alloc;
+
+ if (node) {
+ node1 = cpunode_map[cpu];
+ node2 = raw_field_value(event, "node", raw->data);
+ if (node1 != node2)
+ nr_cross_allocs++;
+ }
+ nr_allocs++;
}

static void process_free_event(struct raw_event_sample *raw __used,
@@ -359,6 +433,7 @@ static void print_summary(void)
total_allocated - total_requested);
printf("Internal fragmentation: %f%%\n",
fragmentation(total_requested, total_allocated));
+ printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
}

static void print_result(void)
@@ -685,6 +760,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
if (list_empty(&alloc_sort))
setup_sorting(&alloc_sort, default_sort_order);

+ setup_cpunode_map();
+
return __cmd_kmem();
}

2009-11-24 16:56:38

by Li Zefan

[permalink] [raw]
Subject: [tip:perf/core] perf kmem: Measure kmalloc/kfree CPU ping-pong call-sites

Commit-ID: 079d3f653134e2f2ac99dae28b08c0cc64268103
Gitweb: http://git.kernel.org/tip/079d3f653134e2f2ac99dae28b08c0cc64268103
Author: Li Zefan <[email protected]>
AuthorDate: Tue, 24 Nov 2009 13:26:55 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 24 Nov 2009 08:49:50 +0100

perf kmem: Measure kmalloc/kfree CPU ping-pong call-sites

Show statistics for allocations and frees on different cpus:

------------------------------------------------------------------------------------------------------
Callsite | Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag
------------------------------------------------------------------------------------------------------
perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11 | 0 | 5.011%
alloc_buffer_head+16 | 288/57 | 280/56 | 5 | 0 | 2.778%
radix_tree_preload+51 | 296/296 | 288/288 | 1 | 0 | 2.703%
tracepoint_add_probe+32e | 157/31 | 154/30 | 5 | 0 | 1.911%
do_maps_open+0 | 796/12 | 792/12 | 66 | 0 | 0.503%
sock_alloc_send_pskb+16e | 23780/495 | 23744/494 | 48 | 38 | 0.151%
anon_vma_prepare+9a | 3744/44 | 3740/44 | 85 | 0 | 0.107%
d_alloc+21 | 64948/164 | 64944/164 | 396 | 0 | 0.006%
proc_alloc_inode+23 | 262292/676 | 262288/676 | 388 | 0 | 0.002%
create_object+28 | 459600/200 | 459600/200 | 2298 | 71 | 0.000%
journal_start+67 | 14440/40 | 14440/40 | 361 | 0 | 0.000%
get_empty_filp+df | 53504/256 | 53504/256 | 209 | 0 | 0.000%
getname+2a | 823296/4096 | 823296/4096 | 201 | 0 | 0.000%
seq_read+2b0 | 544768/4096 | 544768/4096 | 133 | 0 | 0.000%
seq_open+6d | 17024/128 | 17024/128 | 133 | 0 | 0.000%
mmap_region+2e6 | 11704/88 | 11704/88 | 133 | 0 | 0.000%
single_open+0 | 1072/16 | 1072/16 | 67 | 0 | 0.000%
__alloc_skb+2e | 12544/256 | 12544/256 | 49 | 38 | 0.000%
__sigqueue_alloc+4a | 1296/144 | 1296/144 | 9 | 8 | 0.000%
tracepoint_add_probe+6f | 80/16 | 80/16 | 5 | 0 | 0.000%
------------------------------------------------------------------------------------------------------
...

Signed-off-by: Li Zefan <[email protected]>
Acked-by: Pekka Enberg <[email protected]>
Cc: Eduard - Gabriel Munteanu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: [email protected] <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/builtin-kmem.c | 122 ++++++++++++++++++++++++++++++++++----------
1 files changed, 94 insertions(+), 28 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1ecf3f4..173d6db 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -40,13 +40,14 @@ static int *cpunode_map;
static int max_cpu_num;

struct alloc_stat {
- union {
- u64 call_site;
- u64 ptr;
- };
+ u64 call_site;
+ u64 ptr;
u64 bytes_req;
u64 bytes_alloc;
u32 hit;
+ u32 pingpong;
+
+ short alloc_cpu;

struct rb_node node;
};
@@ -144,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}

-static void insert_alloc_stat(unsigned long ptr,
- int bytes_req, int bytes_alloc)
+static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+ int bytes_req, int bytes_alloc, int cpu)
{
struct rb_node **node = &root_alloc_stat.rb_node;
struct rb_node *parent = NULL;
struct alloc_stat *data = NULL;

- if (!alloc_flag)
- return;
-
while (*node) {
parent = *node;
data = rb_entry(*node, struct alloc_stat, node);
@@ -172,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr,
data->bytes_alloc += bytes_req;
} else {
data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
data->ptr = ptr;
+ data->pingpong = 0;
data->hit = 1;
data->bytes_req = bytes_req;
data->bytes_alloc = bytes_alloc;
@@ -180,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr,
rb_link_node(&data->node, parent, node);
rb_insert_color(&data->node, &root_alloc_stat);
}
+ data->call_site = call_site;
+ data->alloc_cpu = cpu;
}

static void insert_caller_stat(unsigned long call_site,
@@ -189,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site,
struct rb_node *parent = NULL;
struct alloc_stat *data = NULL;

- if (!caller_flag)
- return;
-
while (*node) {
parent = *node;
data = rb_entry(*node, struct alloc_stat, node);
@@ -210,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site,
data->bytes_alloc += bytes_req;
} else {
data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
data->call_site = call_site;
+ data->pingpong = 0;
data->hit = 1;
data->bytes_req = bytes_req;
data->bytes_alloc = bytes_alloc;
@@ -238,7 +241,7 @@ static void process_alloc_event(struct raw_event_sample *raw,
bytes_req = raw_field_value(event, "bytes_req", raw->data);
bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);

- insert_alloc_stat(ptr, bytes_req, bytes_alloc);
+ insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
insert_caller_stat(call_site, bytes_req, bytes_alloc);

total_requested += bytes_req;
@@ -253,12 +256,58 @@ static void process_alloc_event(struct raw_event_sample *raw,
nr_allocs++;
}

-static void process_free_event(struct raw_event_sample *raw __used,
- struct event *event __used,
- int cpu __used,
+static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
+static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+ unsigned long call_site,
+ struct rb_root *root,
+ sort_fn_t sort_fn)
+{
+ struct rb_node *node = root->rb_node;
+ struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+ while (node) {
+ struct alloc_stat *data;
+ int cmp;
+
+ data = rb_entry(node, struct alloc_stat, node);
+
+ cmp = sort_fn(&key, data);
+ if (cmp < 0)
+ node = node->rb_left;
+ else if (cmp > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void process_free_event(struct raw_event_sample *raw,
+ struct event *event,
+ int cpu,
u64 timestamp __used,
struct thread *thread __used)
{
+ unsigned long ptr;
+ struct alloc_stat *s_alloc, *s_caller;
+
+ ptr = raw_field_value(event, "ptr", raw->data);
+
+ s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+ if (!s_alloc)
+ return;
+
+ if (cpu != s_alloc->alloc_cpu) {
+ s_alloc->pingpong++;
+
+ s_caller = search_alloc_stat(0, s_alloc->call_site,
+ &root_caller_stat, callsite_cmp);
+ assert(s_caller);
+ s_caller->pingpong++;
+ }
+ s_alloc->alloc_cpu = -1;
}

static void
@@ -379,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
{
struct rb_node *next;

- printf("%.78s\n", graph_dotted_line);
- printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr");
- printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n");
- printf("%.78s\n", graph_dotted_line);
+ printf("%.102s\n", graph_dotted_line);
+ printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
+ printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
+ printf("%.102s\n", graph_dotted_line);

next = rb_first(root);

@@ -390,7 +439,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
struct alloc_stat *data = rb_entry(next, struct alloc_stat,
node);
struct symbol *sym = NULL;
- char bf[BUFSIZ];
+ char buf[BUFSIZ];
u64 addr;

if (is_caller) {
@@ -402,26 +451,28 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
addr = data->ptr;

if (sym != NULL)
- snprintf(bf, sizeof(bf), "%s+%Lx", sym->name,
+ snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
addr - sym->start);
else
- snprintf(bf, sizeof(bf), "%#Lx", addr);
+ snprintf(buf, sizeof(buf), "%#Lx", addr);
+ printf(" %-34s |", buf);

- printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n",
- bf, (unsigned long long)data->bytes_alloc,
+ printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+ (unsigned long long)data->bytes_alloc,
(unsigned long)data->bytes_alloc / data->hit,
(unsigned long long)data->bytes_req,
(unsigned long)data->bytes_req / data->hit,
(unsigned long)data->hit,
+ (unsigned long)data->pingpong,
fragmentation(data->bytes_req, data->bytes_alloc));

next = rb_next(next);
}

if (n_lines == -1)
- printf(" ... | ... | ... | ... | ... \n");
+ printf(" ... | ... | ... | ... | ... | ... \n");

- printf("%.78s\n", graph_dotted_line);
+ printf("%.102s\n", graph_dotted_line);
}

static void print_summary(void)
@@ -597,12 +648,27 @@ static struct sort_dimension frag_sort_dimension = {
.cmp = frag_cmp,
};

+static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ if (l->pingpong < r->pingpong)
+ return -1;
+ else if (l->pingpong > r->pingpong)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension pingpong_sort_dimension = {
+ .name = "pingpong",
+ .cmp = pingpong_cmp,
+};
+
static struct sort_dimension *avail_sorts[] = {
&ptr_sort_dimension,
&callsite_sort_dimension,
&hit_sort_dimension,
&bytes_sort_dimension,
&frag_sort_dimension,
+ &pingpong_sort_dimension,
};

#define NUM_AVAIL_SORTS \
@@ -703,7 +769,7 @@ static const struct option kmem_options[] = {
"stat selector, Pass 'alloc' or 'caller'.",
parse_stat_opt),
OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
- "sort by key(s): ptr, call_site, bytes, hit, frag",
+ "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
parse_sort_opt),
OPT_CALLBACK('l', "line", NULL, "num",
"show n lins",

2009-11-24 16:57:21

by Li Zefan

[permalink] [raw]
Subject: [tip:perf/core] perf kmem: Add help file

Commit-ID: b23d5767a5818caec8547d0bce1588b02bdecd30
Gitweb: http://git.kernel.org/tip/b23d5767a5818caec8547d0bce1588b02bdecd30
Author: Li Zefan <[email protected]>
AuthorDate: Tue, 24 Nov 2009 13:27:11 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 24 Nov 2009 08:49:51 +0100

perf kmem: Add help file

Add Documentation/perf-kmem.txt

Signed-off-by: Li Zefan <[email protected]>
Acked-by: Pekka Enberg <[email protected]>
Cc: Eduard - Gabriel Munteanu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: [email protected] <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Documentation/perf-kmem.txt | 44 ++++++++++++++++++++++++++++++++
tools/perf/command-list.txt | 1 +
2 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 0000000..44b0ce3
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,44 @@
+perf-kmem(1)
+==============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory(slab) properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record} [<options>]
+
+DESCRIPTION
+-----------
+There's two variants of perf kmem:
+
+ 'perf kmem record <command>' to record the kmem events
+ of an arbitrary workload.
+
+ 'perf kmem' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+ Select the input file (default: perf.data)
+
+--stat=<caller|alloc>::
+ Select per callsite or per allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+ Sort the output (default: frag,hit,bytes)
+
+-l <num>::
+--line=<num>::
+ Print n lines only
+
+--raw-ip::
+ Print raw ip instead of symbol
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index d3a6e18..02b09ea 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -14,3 +14,4 @@ perf-timechart mainporcelain common
perf-top mainporcelain common
perf-trace mainporcelain common
perf-probe mainporcelain common
+perf-kmem mainporcelain common

2009-11-24 18:49:32

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics

On Tue, Nov 24, 2009 at 11:07:24AM +0100, Ingo Molnar wrote:
>
> * Li Zefan <[email protected]> wrote:
>
> > > 3)
> > >
> > > it doesnt seem to be working on one of my boxes, which has perf and kmem
> > > events as well:
> > >
> > > aldebaran:~/linux/linux/tools/perf> perf kmem record
> > > ^C[ perf record: Woken up 1 times to write data ]
> > > [ perf record: Captured and wrote 0.050 MB perf.data (~2172 samples) ]
> > >
> >
> > Seems no kmem event is recorded. No sure what happened here.
> >
> > Might be that the parameters that perf-kmem passes to perf-record
> > are not properly selected?
> >
> > Do perf-sched and perf-timechart work on this box?
>
> yeah:
>
> aldebaran:~> perf sched record sleep 1
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.017 MB perf.data (~758 samples) ]
> aldebaran:~> perf trace | tail -5
> distccd-20944 [010] 1792.787376: sched_stat_runtime: comm=distccd pid=20944 runtime=11196 [ns] vruntime=696395420043 [ns]
> init-0 [009] 1792.914837: sched_stat_wait: comm=x86_64-linux-gc pid=881 delay=10686 [ns]
> init-0 [009] 1792.915082: sched_stat_sleep: comm=events/9 pid=44 delay=2183651362 [ns]
> as-889 [013] 1793.008008: sched_stat_runtime: comm=as pid=889 runtime=156807 [ns] vruntime=1553569219042 [ns]
> init-0 [004] 1793.154400: sched_stat_wait: comm=events/4 pid=39 delay=12155 [ns]
>
> aldebaran:~> perf kmem record sleep 1
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.078 MB perf.data (~3398 samples) ]
> aldebaran:~> perf trace | tail -5
> aldebaran:~>
>
> the perf.data has mmap and exit events - but no kmem events.
>
> I've attached the config, in case it matters. It runs latest -tip, with
> your latest series applied as well.
>
> Ingo



I think this is a problem external to kmem events. It's about
trace events/perf in general. It looks like we have some losses.
Steve and Arjan have reported similar things.

I'll investigate this way.

Thanks.

2009-11-24 19:46:39

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH] perf: Fix bad software/trace event recursion counting

Commit 4ed7c92d68a5387ba5f7030dc76eab03558e27f5
(perf_events: Undo some recursion damage) has introduced a bad
reference counting of the recursion context. putting the context
behaves like getting it, dropping every software/trace events
after the first one in a context.

Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Arjan van de Ven <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Steven Rostedt <[email protected]>
---
kernel/perf_event.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index accfd7b..35df94e 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3914,7 +3914,7 @@ void perf_swevent_put_recursion_context(int rctx)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
- cpuctx->recursion[rctx]++;
+ cpuctx->recursion[rctx]--;
put_cpu_var(perf_cpu_context);
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
--
1.6.2.3

2009-11-24 21:44:34

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics


* Li Zefan <[email protected]> wrote:

> So I think some new updates on kernel perf_event break.

yeah, you were right. This commit in latest -tip should fix it:

fe61267: perf_events: Fix bad software/trace event recursion counting

Ingo

2009-11-24 20:38:41

by Frederic Weisbecker

[permalink] [raw]
Subject: [tip:perf/core] perf_events: Fix bad software/trace event recursion counting

Commit-ID: fe6126722718e51fba4879517c11ac12d9775bcc
Gitweb: http://git.kernel.org/tip/fe6126722718e51fba4879517c11ac12d9775bcc
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Tue, 24 Nov 2009 20:38:22 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 24 Nov 2009 21:34:00 +0100

perf_events: Fix bad software/trace event recursion counting

Commit 4ed7c92d68a5387ba5f7030dc76eab03558e27f5
(perf_events: Undo some recursion damage) has introduced a bad
reference counting of the recursion context. putting the context
behaves like getting it, dropping every software/trace events
after the first one in a context.

Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Arjan van de Ven <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/perf_event.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index accfd7b..35df94e 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3914,7 +3914,7 @@ void perf_swevent_put_recursion_context(int rctx)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
- cpuctx->recursion[rctx]++;
+ cpuctx->recursion[rctx]--;
put_cpu_var(perf_cpu_context);
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);

2009-11-24 20:49:12

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix bad software/trace event recursion counting

On Tue, 2009-11-24 at 20:38 +0100, Frederic Weisbecker wrote:

> diff --git a/kernel/perf_event.c b/kernel/perf_event.c
> index accfd7b..35df94e 100644
> --- a/kernel/perf_event.c
> +++ b/kernel/perf_event.c
> @@ -3914,7 +3914,7 @@ void perf_swevent_put_recursion_context(int rctx)
> {
> struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
> barrier();
> - cpuctx->recursion[rctx]++;
> + cpuctx->recursion[rctx]--;
> put_cpu_var(perf_cpu_context);
> }

D'0h!

2009-11-24 22:35:04

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 0/5] perf kmem: Add more functions and show more statistics


* Ingo Molnar <[email protected]> wrote:

>
> * Li Zefan <[email protected]> wrote:
>
> > So I think some new updates on kernel perf_event break.
>
> yeah, you were right. This commit in latest -tip should fix it:
>
> fe61267: perf_events: Fix bad software/trace event recursion counting

i tested perf kmem and it works fine now:

aldebaran:~> perf kmem

SUMMARY
=======
Total bytes requested: 153166032
Total bytes allocated: 188544080
Total bytes wasted on internal fragmentation: 35378048
Internal fragmentation: 18.763807%
Cross CPU allocations: 61680/451425

Ingo