2021-03-31 10:44:57

by Nicholas Fraser

[permalink] [raw]
Subject: [PATCH] perf data: Add JSON export

From ddcfd620e7cad4100d0076090c4b39dba8aeead3 Mon Sep 17 00:00:00 2001
From: Nicholas Fraser <[email protected]>
Date: Wed, 31 Mar 2021 06:10:00 -0400
Subject: [PATCH] perf data: Add JSON export

This adds a feature to export perf data to JSON. It uses a minimal
inline JSON encoding, no external dependencies. Currently it only
outputs some headers and sample metadata but it's easily extensible.

Use it like this:

perf data convert --to-json out.json

Signed-off-by: Nicholas Fraser <[email protected]>
---
tools/perf/Documentation/perf-data.txt | 5 +-
tools/perf/builtin-data.c | 26 ++-
tools/perf/util/Build | 1 +
tools/perf/util/data-convert-bt.c | 2 +-
tools/perf/util/data-convert-bt.h | 11 -
tools/perf/util/data-convert-json.c | 310 +++++++++++++++++++++++++
tools/perf/util/data-convert.h | 10 +
7 files changed, 344 insertions(+), 21 deletions(-)
delete mode 100644 tools/perf/util/data-convert-bt.h
create mode 100644 tools/perf/util/data-convert-json.c

diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index 726b9bc9e1a7..417bf17e265c 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -17,7 +17,7 @@ Data file related processing.
COMMANDS
--------
convert::
- Converts perf data file into another format (only CTF [1] format is support by now).
+ Converts perf data file into another format.
It's possible to set data-convert debug variable to get debug messages from conversion,
like:
perf --debug data-convert data convert ...
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.

+--to-json::
+ Triggers JSON conversion. Specify the JSON filename to output.
+
--tod::
Convert time to wall clock time.

diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index 8d23b8d6ee8e..15ca23675ef0 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -7,7 +7,6 @@
#include "debug.h"
#include <subcmd/parse-options.h>
#include "data-convert.h"
-#include "data-convert-bt.h"

typedef int (*data_cmd_fn_t)(int argc, const char **argv);

@@ -55,7 +54,8 @@ static const char * const data_convert_usage[] = {

static int cmd_data_convert(int argc, const char **argv)
{
- const char *to_ctf = NULL;
+ const char *to_json = NULL;
+ const char *to_ctf = NULL;
struct perf_data_convert_opts opts = {
.force = false,
.all = false,
@@ -63,6 +63,7 @@ static int cmd_data_convert(int argc, const char **argv)
const struct option options[] = {
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
@@ -72,11 +73,6 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_END()
};

-#ifndef HAVE_LIBBABELTRACE_SUPPORT
- pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
- return -1;
-#endif
-
argc = parse_options(argc, argv, options,
data_convert_usage, 0);
if (argc) {
@@ -84,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv)
return -1;
}

+ if (to_json && to_ctf) {
+ pr_err("You cannot specify both --to-ctf and --to-json.\n");
+ return -1;
+ }
+ if (!to_json && !to_ctf) {
+ pr_err("You must specify one of --to-ctf or --to-json.\n");
+ return -1;
+ }
+
+ if (to_json)
+ return bt_convert__perf2json(input_name, to_json, &opts);
+
if (to_ctf) {
#ifdef HAVE_LIBBABELTRACE_SUPPORT
return bt_convert__perf2ctf(input_name, to_ctf, &opts);
#else
- pr_err("The libbabeltrace support is not compiled in.\n");
+ pr_err("The libbabeltrace support is not compiled in. perf should be "
+ "compiled with environment variables LIBBABELTRACE=1 and "
+ "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
return -1;
#endif
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..de9ac182b25a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -163,6 +163,7 @@ perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o

perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+perf-y += data-convert-json.o

perf-y += scripting-engines/

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 27c5fef9ad54..803102207a8b 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -21,7 +21,7 @@
#include <babeltrace/ctf/events.h>
#include <traceevent/event-parse.h>
#include "asm/bug.h"
-#include "data-convert-bt.h"
+#include "data-convert.h"
#include "session.h"
#include "debug.h"
#include "tool.h"
diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h
deleted file mode 100644
index 821674d63c4e..000000000000
--- a/tools/perf/util/data-convert-bt.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __DATA_CONVERT_BT_H
-#define __DATA_CONVERT_BT_H
-#include "data-convert.h"
-#ifdef HAVE_LIBBABELTRACE_SUPPORT
-
-int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
- struct perf_data_convert_opts *opts);
-
-#endif /* HAVE_LIBBABELTRACE_SUPPORT */
-#endif /* __DATA_CONVERT_BT_H */
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
new file mode 100644
index 000000000000..73f22c1b28ce
--- /dev/null
+++ b/tools/perf/util/data-convert-json.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * JSON export.
+ *
+ * Copyright (C) 2021, CodeWeavers Inc. <[email protected]>
+ */
+
+#include "data-convert.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "linux/compiler.h"
+#include "linux/err.h"
+#include "util/auxtrace.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evsel.h"
+#include "util/header.h"
+#include "util/map.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/tool.h"
+
+struct convert_json {
+ struct perf_tool tool;
+ FILE *out;
+ bool first;
+};
+
+static void output_json_string(FILE *out, const char *s)
+{
+ fputc('"', out);
+ while (*s) {
+ switch (*s) {
+
+ // required escapes with special forms as per RFC 8259
+ case '"': fputs("\\\"", out); break;
+ case '\\': fputs("\\\\", out); break;
+ case '\b': fputs("\\b", out); break;
+ case '\f': fputs("\\f", out); break;
+ case '\n': fputs("\\n", out); break;
+ case '\r': fputs("\\r", out); break;
+ case '\t': fputs("\\t", out); break;
+
+ default:
+ // all other control characters must be escaped by hex code
+ if (*s <= 0x1f)
+ fprintf(out, "\\u%04x", *s);
+ else
+ fputc(*s, out);
+ break;
+ }
+
+ ++s;
+ }
+ fputc('"', out);
+}
+
+static void output_sample_callchain_entry(struct perf_tool *tool,
+ u64 ip, struct addr_location *al)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+
+ fprintf(out, "\n\t\t\t\t{");
+ fprintf(out, "\n\t\t\t\t\t\"ip\": \"0x%" PRIx64 "\"", ip);
+
+ if (al && al->sym && al->sym->name && strlen(al->sym->name) > 0) {
+ fprintf(out, ",\n\t\t\t\t\t\"symbol\": ");
+ output_json_string(out, al->sym->name);
+
+ if (al->map && al->map->dso) {
+ const char *dso = al->map->dso->short_name;
+
+ if (dso && strlen(dso) > 0) {
+ fprintf(out, ",\n\t\t\t\t\t\"dso\": ");
+ output_json_string(out, dso);
+ }
+ }
+ }
+
+ fprintf(out, "\n\t\t\t\t}");
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+ struct addr_location al, tal;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("Sample resolution failed!\n");
+ return -1;
+ }
+
+ if (c->first)
+ c->first = false;
+ else
+ fputc(',', out);
+ fprintf(out, "\n\t\t{");
+
+ fprintf(out, "\n\t\t\t\"timestamp\": %" PRIi64, sample->time);
+ fprintf(out, ",\n\t\t\t\"pid\": %i", al.thread->pid_);
+ fprintf(out, ",\n\t\t\t\"tid\": %i", al.thread->tid);
+
+ if (al.thread->cpu >= 0)
+ fprintf(out, ",\n\t\t\t\"cpu\": %i", al.thread->cpu);
+
+ fprintf(out, ",\n\t\t\t\"comm\": ");
+ output_json_string(out, thread__comm_str(al.thread));
+
+ fprintf(out, ",\n\t\t\t\"callchain\": [");
+ if (sample->callchain) {
+ unsigned int i;
+ bool ok;
+ bool first_callchain = true;
+
+ for (i = 0; i < sample->callchain->nr; ++i) {
+ u64 ip = sample->callchain->ips[i];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ switch (ip) {
+ case PERF_CONTEXT_HV:
+ cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: %"
+ PRId64 "\n", (s64) ip);
+ break;
+ }
+ continue;
+ }
+
+ if (first_callchain)
+ first_callchain = false;
+ else
+ fputc(',', out);
+
+ ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
+ output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
+ }
+ } else {
+ output_sample_callchain_entry(tool, sample->ip, &al);
+ }
+ fprintf(out, "\n\t\t\t]");
+
+ fprintf(out, "\n\t\t}");
+ return 0;
+}
+
+static void output_headers(struct perf_session *session, struct convert_json *c)
+{
+ struct stat st;
+ struct perf_header *header = &session->header;
+ int ret;
+ int fd = perf_data__fd(session->data);
+ int i;
+ bool first;
+
+ fprintf(c->out, "\n\t\t\t\"header-version\": %u", header->version);
+
+ ret = fstat(fd, &st);
+ if (ret >= 0) {
+ time_t stctime = st.st_mtime;
+ char buf[256];
+
+ strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
+ fprintf(c->out, ",\n\t\t\t\"captured-on\": \"%s\"", buf);
+ } else {
+ pr_debug("Failed to get mtime of source file, not writing \"captured-on\"");
+ }
+
+ fprintf(c->out, ",\n\t\t\t\"data-offset\": %" PRIu64, header->data_offset);
+ fprintf(c->out, ",\n\t\t\t\"data-size\": %" PRIu64, header->data_size);
+ fprintf(c->out, ",\n\t\t\t\"feat-offset\": %" PRIu64, header->feat_offset);
+
+ fputs(",\n\t\t\t\"hostname\": ", c->out);
+ output_json_string(c->out, header->env.hostname);
+ fputs(",\n\t\t\t\"os-release\": ", c->out);
+ output_json_string(c->out, header->env.os_release);
+ fputs(",\n\t\t\t\"arch\": ", c->out);
+ output_json_string(c->out, header->env.arch);
+
+ fputs(",\n\t\t\t\"cpu-desc\": ", c->out);
+ output_json_string(c->out, header->env.cpu_desc);
+ fputs(",\n\t\t\t\"cpuid\": ", c->out);
+ output_json_string(c->out, header->env.cpuid);
+ fprintf(c->out, ",\n\t\t\t\"nrcpus-online\": %u", header->env.nr_cpus_online);
+ fprintf(c->out, ",\n\t\t\t\"nrcpus-avail\": %u", header->env.nr_cpus_avail);
+
+ fputs(",\n\t\t\t\"perf-version\": ", c->out);
+ output_json_string(c->out, header->env.version);
+
+ fputs(",\n\t\t\t\"cmdline\": [", c->out);
+ first = true;
+ for (i = 0; i < header->env.nr_cmdline; i++) {
+ if (first)
+ first = false;
+ else
+ fputc(',', c->out);
+ fputs("\n\t\t\t\t", c->out);
+ output_json_string(c->out, header->env.cmdline_argv[i]);
+ }
+ fputs("\n\t\t\t]", c->out);
+}
+
+int bt_convert__perf2json(const char *input_name, const char *output_name,
+ struct perf_data_convert_opts *opts __maybe_unused)
+{
+ struct perf_session *session;
+ int fd;
+
+ struct convert_json c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
+ .event_update = perf_event__process_event_update,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ .first = true,
+ };
+
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ .path = input_name,
+ .force = opts->force,
+ };
+
+ if (opts->all) {
+ pr_err("--all is currently unsupported for JSON output.\n");
+ return -1;
+ }
+ if (opts->tod) {
+ pr_err("--tod is currently unsupported for JSON output.\n");
+ return -1;
+ }
+
+ fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? 0 : O_EXCL), 0666);
+ if (fd == -1) {
+ if (errno == EEXIST)
+ pr_err("Output file exists. Use --force to overwrite it.\n");
+ else
+ pr_err("Error opening output file!\n");
+ return -1;
+ }
+
+ c.out = fdopen(fd, "w");
+ if (!c.out) {
+ fprintf(stderr, "Error opening output file!\n");
+ return -1;
+ }
+
+ session = perf_session__new(&data, false, &c.tool);
+ if (IS_ERR(session)) {
+ fprintf(stderr, "Error creating perf session!\n");
+ return -1;
+ }
+
+ if (symbol__init(&session->header.env) < 0) {
+ fprintf(stderr, "Symbol init error!\n");
+ return -1;
+ }
+
+ // Version number for future-proofing. Most additions should be able to be
+ // done in a backwards-compatible way so this should only need to be bumped
+ // if some major breaking change must be made.
+ fprintf(c.out, "{\n\t\"linux-perf-json-version\": 1,");
+
+ // Output headers
+ fprintf(c.out, "\n\t\"headers\": {");
+ output_headers(session, &c);
+ fprintf(c.out, "\n\t},");
+
+ // Output samples
+ fprintf(c.out, "\n\t\"samples\": [");
+ perf_session__process_events(session);
+ fprintf(c.out, "\n\t]\n}\n");
+
+ perf_session__delete(session);
+ return 0;
+}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index feab5f114e37..1b4c5f598415 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -2,10 +2,20 @@
#ifndef __DATA_CONVERT_H
#define __DATA_CONVERT_H

+#include <stdbool.h>
+
struct perf_data_convert_opts {
bool force;
bool all;
bool tod;
};

+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+
+int bt_convert__perf2json(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+
#endif /* __DATA_CONVERT_H */
--
2.31.0



2021-04-01 18:44:08

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH] perf data: Add JSON export

On Wed, Mar 31, 2021 at 06:42:48AM -0400, Nicholas Fraser wrote:
> From ddcfd620e7cad4100d0076090c4b39dba8aeead3 Mon Sep 17 00:00:00 2001
> From: Nicholas Fraser <[email protected]>
> Date: Wed, 31 Mar 2021 06:10:00 -0400
> Subject: [PATCH] perf data: Add JSON export

no need to add headers again in here

>
> This adds a feature to export perf data to JSON. It uses a minimal
> inline JSON encoding, no external dependencies. Currently it only
> outputs some headers and sample metadata but it's easily extensible.
>
> Use it like this:
>
> perf data convert --to-json out.json

please add similar output summary message we have for CTF conversion:

[ perf data convert: Converted 'perf.data' into CTF data 'data' ]
[ perf data convert: Converted and wrote 0.000 MB (10 samples) ]

also I will not push hard for test, becase we don't have any for CTF ;-)
but if you could think of any, that'd be great

> +
> +static void output_headers(struct perf_session *session, struct convert_json *c)
> +{
> + struct stat st;
> + struct perf_header *header = &session->header;
> + int ret;
> + int fd = perf_data__fd(session->data);
> + int i;
> + bool first;
> +
> + fprintf(c->out, "\n\t\t\t\"header-version\": %u", header->version);
> +
> + ret = fstat(fd, &st);
> + if (ret >= 0) {
> + time_t stctime = st.st_mtime;
> + char buf[256];
> +
> + strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
> + fprintf(c->out, ",\n\t\t\t\"captured-on\": \"%s\"", buf);
> + } else {
> + pr_debug("Failed to get mtime of source file, not writing \"captured-on\"");
> + }
> +
> + fprintf(c->out, ",\n\t\t\t\"data-offset\": %" PRIu64, header->data_offset);
> + fprintf(c->out, ",\n\t\t\t\"data-size\": %" PRIu64, header->data_size);
> + fprintf(c->out, ",\n\t\t\t\"feat-offset\": %" PRIu64, header->feat_offset);

I was wondering how to make this \t mess more readable,
how about you define function like output_json:

output_json(FILE, level, field, format, ...);

and use it:

output_json(c->out, 3, "data-offset", "PRIu64", header->data_offset);
output_json(c->out, 3, "data-size", "PRIu64", header->data_size);
output_json(c->out, 3, "feat-offset", PRIu64, header->feat_offset);

similar way as we do for pr_debug -> eprintf

SNIP

> +
> + fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? 0 : O_EXCL), 0666);
> + if (fd == -1) {
> + if (errno == EEXIST)
> + pr_err("Output file exists. Use --force to overwrite it.\n");
> + else
> + pr_err("Error opening output file!\n");
> + return -1;
> + }
> +
> + c.out = fdopen(fd, "w");
> + if (!c.out) {
> + fprintf(stderr, "Error opening output file!\n");
> + return -1;
> + }
> +
> + session = perf_session__new(&data, false, &c.tool);
> + if (IS_ERR(session)) {
> + fprintf(stderr, "Error creating perf session!\n");
> + return -1;

here we should close c.out and call perf_session__delete,
we normaly do goto to the end of the function in this case

> + }
> +
> + if (symbol__init(&session->header.env) < 0) {
> + fprintf(stderr, "Symbol init error!\n");
> + return -1;
> + }
> +
> + // Version number for future-proofing. Most additions should be able to be
> + // done in a backwards-compatible way so this should only need to be bumped
> + // if some major breaking change must be made.
> + fprintf(c.out, "{\n\t\"linux-perf-json-version\": 1,");
> +
> + // Output headers
> + fprintf(c.out, "\n\t\"headers\": {");
> + output_headers(session, &c);
> + fprintf(c.out, "\n\t},");
> +
> + // Output samples
> + fprintf(c.out, "\n\t\"samples\": [");
> + perf_session__process_events(session);
> + fprintf(c.out, "\n\t]\n}\n");
> +

you need to close c.out

> + perf_session__delete(session);
> + return 0;
> +}
> diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
> index feab5f114e37..1b4c5f598415 100644
> --- a/tools/perf/util/data-convert.h
> +++ b/tools/perf/util/data-convert.h
> @@ -2,10 +2,20 @@
> #ifndef __DATA_CONVERT_H
> #define __DATA_CONVERT_H
>
> +#include <stdbool.h>
> +
> struct perf_data_convert_opts {
> bool force;
> bool all;
> bool tod;
> };
>
> +#ifdef HAVE_LIBBABELTRACE_SUPPORT
> +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
> + struct perf_data_convert_opts *opts);
> +#endif /* HAVE_LIBBABELTRACE_SUPPORT */
> +
> +int bt_convert__perf2json(const char *input_name, const char *to_ctf,
> + struct perf_data_convert_opts *opts);
> +
> #endif /* __DATA_CONVERT_H */

great, thanks for this
jirka

2021-04-06 18:44:53

by Nicholas Fraser

[permalink] [raw]
Subject: Re: [PATCH] perf data: Add JSON export

Hi Jiri,

Thanks again for the review. I've fixed the issues you mentioned. Some notes
below:


On 2021-04-01 8:15 a.m., Jiri Olsa wrote:
> I recall you did not add support for walltime clock,
> don't you need it to sync with other events?

Not necessarily. As long as the perf recording and the GPU trace are made from
the same clock, the events should line up; the real time doesn't matter. I've
added it anyway though since it's good to have. It now outputs "clockid",
"clock-time" and "real-time" in the headers if clock info was included in the
recording.


On 2021-04-01 9:18 a.m., Jiri Olsa wrote:
> also I will not push hard for test, becase we don't have any for CTF ;-)
> but if you could think of any, that'd be great

I'm not sure what kind of tests would be useful. We could include a small
perf.data file and its JSON output and verify that they match. This is probably
pointless because almost any change to the JSON code would be expected to
change the output so we'd just be resetting the test on each change.


> I was wondering how to make this \t mess more readable,
> how about you define function like output_json:
>
> output_json(FILE, level, field, format, ...);
>
> and use it:
>
> output_json(c->out, 3, "data-offset", "PRIu64", header->data_offset);
> output_json(c->out, 3, "data-size", "PRIu64", header->data_size);
> output_json(c->out, 3, "feat-offset", PRIu64, header->feat_offset);
>
> similar way as we do for pr_debug -> eprintf

I've cleaned up the output with some additional helper functions. This is
essentially turning into its own mini JSON encoder which I was hoping to avoid
but I suppose it's necessary to make the code maintainable.


I'll be sending a new patch shortly.

Nick

2021-04-06 18:49:10

by Nicholas Fraser

[permalink] [raw]
Subject: [PATCH] perf data: Add JSON export

This adds a feature to export perf data to JSON. It uses a minimal
inline JSON encoding, no external dependencies. Currently it only
outputs some headers and sample metadata but it's easily extensible.

Use it like this:

perf data convert --to-json out.json

Signed-off-by: Nicholas Fraser <[email protected]>
---
tools/perf/Documentation/perf-data.txt | 5 +-
tools/perf/builtin-data.c | 26 +-
tools/perf/util/Build | 1 +
tools/perf/util/data-convert-bt.c | 2 +-
tools/perf/util/data-convert-bt.h | 11 -
tools/perf/util/data-convert-json.c | 384 +++++++++++++++++++++++++
tools/perf/util/data-convert.h | 10 +
7 files changed, 418 insertions(+), 21 deletions(-)
delete mode 100644 tools/perf/util/data-convert-bt.h
create mode 100644 tools/perf/util/data-convert-json.c

diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index 726b9bc9e1a7..417bf17e265c 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -17,7 +17,7 @@ Data file related processing.
COMMANDS
--------
convert::
- Converts perf data file into another format (only CTF [1] format is support by now).
+ Converts perf data file into another format.
It's possible to set data-convert debug variable to get debug messages from conversion,
like:
perf --debug data-convert data convert ...
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.

+--to-json::
+ Triggers JSON conversion. Specify the JSON filename to output.
+
--tod::
Convert time to wall clock time.

diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index 8d23b8d6ee8e..15ca23675ef0 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -7,7 +7,6 @@
#include "debug.h"
#include <subcmd/parse-options.h>
#include "data-convert.h"
-#include "data-convert-bt.h"

typedef int (*data_cmd_fn_t)(int argc, const char **argv);

@@ -55,7 +54,8 @@ static const char * const data_convert_usage[] = {

static int cmd_data_convert(int argc, const char **argv)
{
- const char *to_ctf = NULL;
+ const char *to_json = NULL;
+ const char *to_ctf = NULL;
struct perf_data_convert_opts opts = {
.force = false,
.all = false,
@@ -63,6 +63,7 @@ static int cmd_data_convert(int argc, const char **argv)
const struct option options[] = {
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
@@ -72,11 +73,6 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_END()
};

-#ifndef HAVE_LIBBABELTRACE_SUPPORT
- pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
- return -1;
-#endif
-
argc = parse_options(argc, argv, options,
data_convert_usage, 0);
if (argc) {
@@ -84,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv)
return -1;
}

+ if (to_json && to_ctf) {
+ pr_err("You cannot specify both --to-ctf and --to-json.\n");
+ return -1;
+ }
+ if (!to_json && !to_ctf) {
+ pr_err("You must specify one of --to-ctf or --to-json.\n");
+ return -1;
+ }
+
+ if (to_json)
+ return bt_convert__perf2json(input_name, to_json, &opts);
+
if (to_ctf) {
#ifdef HAVE_LIBBABELTRACE_SUPPORT
return bt_convert__perf2ctf(input_name, to_ctf, &opts);
#else
- pr_err("The libbabeltrace support is not compiled in.\n");
+ pr_err("The libbabeltrace support is not compiled in. perf should be "
+ "compiled with environment variables LIBBABELTRACE=1 and "
+ "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
return -1;
#endif
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..de9ac182b25a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -163,6 +163,7 @@ perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o

perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+perf-y += data-convert-json.o

perf-y += scripting-engines/

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 27c5fef9ad54..803102207a8b 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -21,7 +21,7 @@
#include <babeltrace/ctf/events.h>
#include <traceevent/event-parse.h>
#include "asm/bug.h"
-#include "data-convert-bt.h"
+#include "data-convert.h"
#include "session.h"
#include "debug.h"
#include "tool.h"
diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h
deleted file mode 100644
index 821674d63c4e..000000000000
--- a/tools/perf/util/data-convert-bt.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __DATA_CONVERT_BT_H
-#define __DATA_CONVERT_BT_H
-#include "data-convert.h"
-#ifdef HAVE_LIBBABELTRACE_SUPPORT
-
-int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
- struct perf_data_convert_opts *opts);
-
-#endif /* HAVE_LIBBABELTRACE_SUPPORT */
-#endif /* __DATA_CONVERT_BT_H */
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
new file mode 100644
index 000000000000..b57c48d35503
--- /dev/null
+++ b/tools/perf/util/data-convert-json.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * JSON export.
+ *
+ * Copyright (C) 2021, CodeWeavers Inc. <[email protected]>
+ */
+
+#include "data-convert.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "linux/compiler.h"
+#include "linux/err.h"
+#include "util/auxtrace.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/header.h"
+#include "util/map.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/tool.h"
+
+struct convert_json {
+ struct perf_tool tool;
+ FILE *out;
+ bool first;
+ u64 events_count;
+};
+
+// Outputs a JSON-encoded string surrounded by quotes with characters escaped.
+static void output_json_string(FILE *out, const char *s)
+{
+ fputc('"', out);
+ while (*s) {
+ switch (*s) {
+
+ // required escapes with special forms as per RFC 8259
+ case '"': fputs("\\\"", out); break;
+ case '\\': fputs("\\\\", out); break;
+ case '\b': fputs("\\b", out); break;
+ case '\f': fputs("\\f", out); break;
+ case '\n': fputs("\\n", out); break;
+ case '\r': fputs("\\r", out); break;
+ case '\t': fputs("\\t", out); break;
+
+ default:
+ // all other control characters must be escaped by hex code
+ if (*s <= 0x1f)
+ fprintf(out, "\\u%04x", *s);
+ else
+ fputc(*s, out);
+ break;
+ }
+
+ ++s;
+ }
+ fputc('"', out);
+}
+
+// Outputs an optional comma, newline and indentation to delimit a new value
+// from the previous one in a JSON object or array.
+static void output_json_delimiters(FILE *out, bool comma, int depth)
+{
+ int i;
+
+ if (comma)
+ fputc(',', out);
+ fputc('\n', out);
+ for (i = 0; i < depth; ++i)
+ fputc('\t', out);
+}
+
+// Outputs a printf format string (with delimiter) as a JSON value.
+__printf(4, 5)
+static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
+{
+ va_list args;
+
+ output_json_delimiters(out, comma, depth);
+ va_start(args, format);
+ vfprintf(out, format, args);
+ va_end(args);
+}
+
+// Outputs a JSON key-value pair where the value is a string.
+static void output_json_key_string(FILE *out, bool comma, int depth,
+ const char *key, const char *value)
+{
+ output_json_delimiters(out, comma, depth);
+ output_json_string(out, key);
+ fputs(": ", out);
+ output_json_string(out, value);
+}
+
+// Outputs a JSON key-value pair where the value is a printf format string.
+__(printf, 5, 6)
+static void output_json_key_format(FILE *out, bool comma, int depth,
+ const char *key, const char *format, ...)
+{
+ va_list args;
+
+ output_json_delimiters(out, comma, depth);
+ output_json_string(out, key);
+ fputs(": ", out);
+ va_start(args, format);
+ vfprintf(out, format, args);
+ va_end(args);
+}
+
+static void output_sample_callchain_entry(struct perf_tool *tool,
+ u64 ip, struct addr_location *al)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+
+ output_json_format(out, false, 4, "{");
+ output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
+
+ if (al && al->sym && al->sym->name && strlen(al->sym->name) > 0) {
+ fputc(',', out);
+ output_json_key_string(out, false, 5, "symbol", al->sym->name);
+
+ if (al->map && al->map->dso) {
+ const char *dso = al->map->dso->short_name;
+
+ if (dso && strlen(dso) > 0) {
+ fputc(',', out);
+ output_json_key_string(out, false, 5, "dso", dso);
+ }
+ }
+ }
+
+ output_json_format(out, false, 4, "}");
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+ struct addr_location al, tal;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("Sample resolution failed!\n");
+ return -1;
+ }
+
+ ++c->events_count;
+
+ if (c->first)
+ c->first = false;
+ else
+ fputc(',', out);
+ output_json_format(out, false, 2, "{");
+
+ output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
+ output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_);
+ output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid);
+
+ if (al.thread->cpu >= 0)
+ output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu);
+
+ output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
+
+ output_json_key_format(out, true, 3, "callchain", "[");
+ if (sample->callchain) {
+ unsigned int i;
+ bool ok;
+ bool first_callchain = true;
+
+ for (i = 0; i < sample->callchain->nr; ++i) {
+ u64 ip = sample->callchain->ips[i];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ switch (ip) {
+ case PERF_CONTEXT_HV:
+ cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: %"
+ PRId64 "\n", (s64) ip);
+ break;
+ }
+ continue;
+ }
+
+ if (first_callchain)
+ first_callchain = false;
+ else
+ fputc(',', out);
+
+ ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
+ output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
+ }
+ } else {
+ output_sample_callchain_entry(tool, sample->ip, &al);
+ }
+ output_json_format(out, false, 3, "]");
+
+ output_json_format(out, false, 2, "}");
+ return 0;
+}
+
+static void output_headers(struct perf_session *session, struct convert_json *c)
+{
+ struct stat st;
+ struct perf_header *header = &session->header;
+ int ret;
+ int fd = perf_data__fd(session->data);
+ int i;
+ FILE *out = c->out;
+
+ output_json_key_format(out, false, 2, "header-version", "%u", header->version);
+
+ ret = fstat(fd, &st);
+ if (ret >= 0) {
+ time_t stctime = st.st_mtime;
+ char buf[256];
+
+ strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
+ output_json_key_string(out, true, 2, "captured-on", buf);
+ } else {
+ pr_debug("Failed to get mtime of source file, not writing captured-on");
+ }
+
+ output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
+ output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
+ output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
+
+ output_json_key_string(out, true, 2, "hostname", header->env.hostname);
+ output_json_key_string(out, true, 2, "os-release", header->env.os_release);
+ output_json_key_string(out, true, 2, "arch", header->env.arch);
+
+ output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+ output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
+ output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
+ output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
+
+ if (header->env.clock.enabled) {
+ output_json_key_format(out, true, 2, "clockid",
+ "%u", header->env.clock.clockid);
+ output_json_key_format(out, true, 2, "clock-time",
+ "%" PRIu64, header->env.clock.clockid_ns);
+ output_json_key_format(out, true, 2, "real-time",
+ "%" PRIu64, header->env.clock.tod_ns);
+ }
+
+ output_json_key_string(out, true, 2, "perf-version", header->env.version);
+
+ output_json_key_format(out, true, 2, "cmdline", "[");
+ for (i = 0; i < header->env.nr_cmdline; i++) {
+ output_json_delimiters(out, i != 0, 3);
+ output_json_string(c->out, header->env.cmdline_argv[i]);
+ }
+ output_json_format(out, false, 2, "]");
+}
+
+int bt_convert__perf2json(const char *input_name, const char *output_name,
+ struct perf_data_convert_opts *opts __maybe_unused)
+{
+ struct perf_session *session;
+ int fd;
+ int ret = -1;
+
+ struct convert_json c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
+ .event_update = perf_event__process_event_update,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ .first = true,
+ .events_count = 0,
+ };
+
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ .path = input_name,
+ .force = opts->force,
+ };
+
+ if (opts->all) {
+ pr_err("--all is currently unsupported for JSON output.\n");
+ goto err;
+ }
+ if (opts->tod) {
+ pr_err("--tod is currently unsupported for JSON output.\n");
+ goto err;
+ }
+
+ fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
+ if (fd == -1) {
+ if (errno == EEXIST)
+ pr_err("Output file exists. Use --force to overwrite it.\n");
+ else
+ pr_err("Error opening output file!\n");
+ goto err;
+ }
+
+ c.out = fdopen(fd, "w");
+ if (!c.out) {
+ fprintf(stderr, "Error opening output file!\n");
+ close(fd);
+ goto err;
+ }
+
+ session = perf_session__new(&data, false, &c.tool);
+ if (IS_ERR(session)) {
+ fprintf(stderr, "Error creating perf session!\n");
+ goto err_fclose;
+ }
+
+ if (symbol__init(&session->header.env) < 0) {
+ fprintf(stderr, "Symbol init error!\n");
+ goto err_session_delete;
+ }
+
+ // The opening brace is printed manually because it isn't delimited from a
+ // previous value (i.e. we don't want a leading newline)
+ fputc('{', c.out);
+
+ // Version number for future-proofing. Most additions should be able to be
+ // done in a backwards-compatible way so this should only need to be bumped
+ // if some major breaking change must be made.
+ output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
+
+ // Output headers
+ output_json_format(c.out, true, 1, "\"headers\": {");
+ output_headers(session, &c);
+ output_json_format(c.out, false, 1, "}");
+
+ // Output samples
+ output_json_format(c.out, true, 1, "\"samples\": [");
+ perf_session__process_events(session);
+ output_json_format(c.out, false, 1, "]");
+ output_json_format(c.out, false, 0, "}");
+ fputc('\n', c.out);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
+ data.path, output_name);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
+ (ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
+
+ ret = 0;
+err_session_delete:
+ perf_session__delete(session);
+err_fclose:
+ fclose(c.out);
+err:
+ return ret;
+}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index feab5f114e37..1b4c5f598415 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -2,10 +2,20 @@
#ifndef __DATA_CONVERT_H
#define __DATA_CONVERT_H

+#include <stdbool.h>
+
struct perf_data_convert_opts {
bool force;
bool all;
bool tod;
};

+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+
+int bt_convert__perf2json(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+
#endif /* __DATA_CONVERT_H */
--
2.31.1