From: Kan Liang <[email protected]>
This patch set supports per-sample freq/CPU%/CORE_BUSY% print in perf
report -D and --stdio.
For printing these information, the perf.data file must have been obtained
by group read and using special events cycles, ref-cycles, msr/tsc/,
msr/aperf/ or msr/mperf/.
- Freq (MHz): The frequency during the sample interval. Needs cycles
ref-cycles event.
- CPU%: CPU utilization during the sample interval. Needs ref-cycles and
msr/tsc/ events.
- CORE_BUSY%: actual percent performance (APERF/MPERF%) during the
sample interval. Needs msr/aperf/ and msr/mperf/ events.
For printing CPU% and CORE_BUSY%, please also apply the kernel patch.
http://marc.info/?l=linux-kernel&m=143747254926369&w=2
Here is an example:
$ perf record -e
'{cycles,ref-cycles,msr/tsc/,msr/mperf/,msr/aperf/}:S' ~/tchain_edit
$ perf report --stdio --group --show-freq-perf
Overhead FREQ MHz CPU% CORE_BUSY%
Command Shared Object Symbol
........................................ ......... ..... ..........
........... ................ ......................
99.54% 99.54% 99.53% 99.53% 99.53% 2301 96 99
tchain_edit tchain_edit [.] f3
0.20% 0.20% 0.20% 0.20% 0.20% 2301 98 99
tchain_edit tchain_edit [.] f2
0.05% 0.05% 0.05% 0.05% 0.05% 2300 98 99
tchain_edit [kernel.vmlinux] [k] read_tsc
Changes since V1:
- Save cpu max freq to header when recording
- Read cpu max freq and msr type from header when reporting
Changes since V2:
- Introduce generic FEAT for CPU related data stored
- Make cpu max freq and msr type part of perf_session_env
- rename cpu_u to cpu_util
- Don't save sample value in perf_sample and discards new iterator.
Calculating the freq_perf_info in add_entry_cb callback
- Introduce symbol_conf.freq_perf_type for related hpp column visibility
Kan Liang (5):
perf,tools: introduce generic FEAT for CPU attributes
perf,tools: read msr pmu type from header.
perf,tools: Dump per-sample freq/CPU%/CORE_BUSY% in report -D
perf,tools: caculate and save freq/CPU%/CORE_BUSY% in he_stat
perf,tools: Show freq/CPU%/CORE_BUSY% in perf report --stdio
tools/perf/Documentation/perf-report.txt | 12 ++++++
tools/perf/builtin-report.c | 56 +++++++++++++++++++++++++
tools/perf/ui/hist.c | 71 +++++++++++++++++++++++++++++---
tools/perf/util/cpumap.c | 32 ++++++++++++++
tools/perf/util/cpumap.h | 1 +
tools/perf/util/header.c | 38 +++++++++++++++++
tools/perf/util/header.h | 8 ++++
tools/perf/util/hist.h | 3 ++
tools/perf/util/session.c | 41 ++++++++++++++----
tools/perf/util/session.h | 47 +++++++++++++++++++++
tools/perf/util/sort.c | 3 ++
tools/perf/util/sort.h | 3 ++
tools/perf/util/symbol.h | 12 +++++-
13 files changed, 314 insertions(+), 13 deletions(-)
--
1.8.3.1
From: Kan Liang <[email protected]>
This patch introduces generic FEAT for CPU attributes. For the patch
set, we only need cpu max frequency. But it can be easily extented to
support more other CPU attributes.
The cpu max frequency is from the first online cpu.
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/cpumap.c | 32 ++++++++++++++++++++++++++++++++
tools/perf/util/cpumap.h | 1 +
tools/perf/util/header.c | 35 +++++++++++++++++++++++++++++++++++
tools/perf/util/header.h | 7 +++++++
4 files changed, 75 insertions(+)
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 3667e21..ef7e57e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -499,3 +499,35 @@ int cpu__setup_cpunode_map(void)
closedir(dir1);
return 0;
}
+
+u64 get_cpu_max_freq(void)
+{
+ const char *mnt;
+ char path[PATH_MAX], tmp;
+ FILE *fp;
+ u64 freq;
+ int cpu = 0;
+ int ret;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ return 0;
+
+ snprintf(path, PATH_MAX, "%s/devices/system/cpu/online", mnt);
+ fp = fopen(path, "r");
+ if (fp) {
+ ret = fscanf(fp, "%u%c", &cpu, &tmp);
+ fclose(fp);
+ if (ret < 1)
+ return 0;
+ }
+
+ snprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", mnt, cpu);
+ fp = fopen(path, "r");
+ if (!fp)
+ return 0;
+ ret = fscanf(fp, "%lu", &freq);
+ fclose(fp);
+
+ return (ret == 1) ? freq : 0;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 0af9cec..8fd2bd6 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -58,6 +58,7 @@ int max_node_num;
int *cpunode_map;
int cpu__setup_cpunode_map(void);
+u64 get_cpu_max_freq(void);
static inline int cpu__max_node(void)
{
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 03ace57..31be7bc 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -862,6 +862,16 @@ write_it:
return do_write_string(fd, buffer);
}
+static int write_cpu_attributes(int fd, struct perf_header *h __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ u64 freq;
+
+ freq = get_cpu_max_freq();
+
+ return do_write(fd, &freq, sizeof(freq));
+}
+
static int write_branch_stack(int fd __maybe_unused,
struct perf_header *h __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
@@ -1158,6 +1168,11 @@ static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
fprintf(fp, "# cpuid : %s\n", ph->env.cpuid);
}
+static void print_cpu_attributes(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# CPU attributes: max frequency = %lu KHz\n", ph->env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ]);
+}
+
static void print_branch_stack(struct perf_header *ph __maybe_unused,
int fd __maybe_unused, FILE *fp)
{
@@ -1471,6 +1486,25 @@ static int process_cpuid(struct perf_file_section *section __maybe_unused,
return ph->env.cpuid ? 0 : -ENOMEM;
}
+static int process_cpu_attributes(struct perf_file_section *section __maybe_unused,
+ struct perf_header *ph, int fd,
+ void *data __maybe_unused)
+{
+ ssize_t ret;
+ u64 nr;
+
+ ret = readn(fd, &nr, sizeof(nr));
+ if (ret != sizeof(nr))
+ return -1;
+
+ if (ph->needs_swap)
+ nr = bswap_64(nr);
+
+ ph->env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ] = nr;
+
+ return 0;
+}
+
static int process_total_mem(struct perf_file_section *section __maybe_unused,
struct perf_header *ph, int fd,
void *data __maybe_unused)
@@ -1885,6 +1919,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPP(HEADER_NRCPUS, nrcpus),
FEAT_OPP(HEADER_CPUDESC, cpudesc),
FEAT_OPP(HEADER_CPUID, cpuid),
+ FEAT_OPP(HEADER_CPU_ATTR, cpu_attributes),
FEAT_OPP(HEADER_TOTAL_MEM, total_mem),
FEAT_OPP(HEADER_EVENT_DESC, event_desc),
FEAT_OPP(HEADER_CMDLINE, cmdline),
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d4d5796..fe1654c 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -22,6 +22,7 @@ enum {
HEADER_NRCPUS,
HEADER_CPUDESC,
HEADER_CPUID,
+ HEADER_CPU_ATTR,
HEADER_TOTAL_MEM,
HEADER_CMDLINE,
HEADER_EVENT_DESC,
@@ -66,6 +67,11 @@ struct perf_header;
int perf_file_header__read(struct perf_file_header *header,
struct perf_header *ph, int fd);
+enum perf_header_cpu_attr {
+ PERF_HEADER_CPU_MAX_FREQ = 0,
+ PERF_HEADER_CPU_ATTR_MAX,
+};
+
struct perf_session_env {
char *hostname;
char *os_release;
@@ -75,6 +81,7 @@ struct perf_session_env {
int nr_cpus_avail;
char *cpu_desc;
char *cpuid;
+ u64 cpu_attr[PERF_HEADER_CPU_ATTR_MAX];
unsigned long long total_mem;
int nr_cmdline;
--
1.8.3.1
From: Kan Liang <[email protected]>
Get msr pmu type when processing pmu_mappings
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/header.c | 3 +++
tools/perf/util/header.h | 1 +
2 files changed, 4 insertions(+)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 31be7bc..3fa8503 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1766,6 +1766,9 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
/* include a NULL character at the end */
strbuf_add(&sb, "", 1);
+ if (!strcmp(name, "msr"))
+ ph->env.msr_pmu_type = type;
+
free(name);
pmu_num--;
}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index fe1654c..1863f6b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -95,6 +95,7 @@ struct perf_session_env {
char *sibling_threads;
char *numa_nodes;
char *pmu_mappings;
+ unsigned int msr_pmu_type;
};
struct perf_header {
--
1.8.3.1
From: Kan Liang <[email protected]>
The group read results from cycles/ref-cycles/TSC/ASTATE/MSTATE event
can be used to calculate the frequency, CPU Utilization and percent
performance during each sampling period.
This patch shows them in report -D.
Here is an example:
$ perf record -e
'{cycles,ref-cycles,msr/tsc/,msr/mperf/,msr/aperf/}:S' ~/tchain_edit
Here is one sample from perf report -D
1972044565107 0x3498 [0x88]: PERF_RECORD_SAMPLE(IP, 0x2): 10608/10608:
0x4005fd period: 564686 addr: 0
... sample_read:
.... group nr 5
..... id 0000000000000012, value 0000000002143901
..... id 0000000000000052, value 0000000002143896
..... id 0000000000000094, value 00000000021e443d
..... id 00000000000000d4, value 00000000021db984
..... id 0000000000000114, value 00000000021db964
..... Freq 2301 MHz
..... CPU% 98%
..... CORE_BUSY% 99%
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/session.c | 41 ++++++++++++++++++++++++++++++++++-------
tools/perf/util/session.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 81 insertions(+), 7 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ed9dc25..7971546 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -851,8 +851,16 @@ static void perf_evlist__print_tstamp(struct perf_evlist *evlist,
printf("%" PRIu64 " ", sample->time);
}
-static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+static void sample_read__printf(struct perf_session *session,
+ struct perf_evlist *evlist,
+ struct perf_sample *sample,
+ u64 read_format)
{
+ struct perf_evsel *evsel;
+ struct perf_sample_id *sid;
+ u64 data[FREQ_PERF_MAX] = { 0 };
+ u64 cpu_max_freq = session->header.env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ];
+
printf("... sample_read:\n");
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -875,10 +883,26 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format)
printf("..... id %016" PRIx64
", value %016" PRIx64 "\n",
value->id, value->value);
+
+ sid = perf_evlist__id2sid(evlist, value->id);
+ evsel = sid->evsel;
+ if (evsel != NULL)
+ SET_FREQ_PERF_VALUE(session->header.env.msr_pmu_type,
+ evsel, data, value->value);
}
} else
printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
sample->read.one.id, sample->read.one.value);
+
+ if (HAS_FREQ(data))
+ printf("..... Freq %lu MHz\n",
+ GET_FREQ(data, cpu_max_freq/1000));
+ if (HAS_CPU_UTIL(data))
+ printf("..... CPU%% %lu%%\n",
+ GET_CPU_UTIL(data));
+ if (HAS_CORE_BUSY(data))
+ printf("..... CORE_BUSY%% %lu%%\n",
+ GET_CORE_BUSY(data));
}
static void dump_event(struct perf_evlist *evlist, union perf_event *event,
@@ -899,7 +923,8 @@ static void dump_event(struct perf_evlist *evlist, union perf_event *event,
event->header.size, perf_event__name(event->header.type));
}
-static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
+static void dump_sample(struct perf_session *session, struct perf_evlist *evlist,
+ struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *sample)
{
u64 sample_type;
@@ -938,7 +963,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
printf("... transaction: %" PRIx64 "\n", sample->transaction);
if (sample_type & PERF_SAMPLE_READ)
- sample_read__printf(sample, evsel->attr.read_format);
+ sample_read__printf(session, evlist, sample, evsel->attr.read_format);
}
static struct machine *machines__find_for_cpumode(struct machines *machines,
@@ -1036,12 +1061,13 @@ static int
&sample->read.one, machine);
}
-static int machines__deliver_event(struct machines *machines,
+static int machines__deliver_event(struct perf_session *session,
struct perf_evlist *evlist,
union perf_event *event,
struct perf_sample *sample,
struct perf_tool *tool, u64 file_offset)
{
+ struct machines *machines = &session->machines;
struct perf_evsel *evsel;
struct machine *machine;
@@ -1053,11 +1079,12 @@ static int machines__deliver_event(struct machines *machines,
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
- dump_sample(evsel, event, sample);
if (evsel == NULL) {
++evlist->stats.nr_unknown_id;
return 0;
}
+ dump_sample(session, evlist, evsel, event, sample);
+
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
return 0;
@@ -1113,7 +1140,7 @@ static int perf_session__deliver_event(struct perf_session *session,
if (ret > 0)
return 0;
- return machines__deliver_event(&session->machines, session->evlist,
+ return machines__deliver_event(session, session->evlist,
event, sample, tool, file_offset);
}
@@ -1179,7 +1206,7 @@ int perf_session__deliver_synth_event(struct perf_session *session,
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
return perf_session__process_user_event(session, event, 0);
- return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
+ return machines__deliver_event(session, evlist, event, sample, tool, 0);
}
static void event_swap(union perf_event *event, bool sample_id_all)
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index b44afc7..e6e408b 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -42,6 +42,53 @@ struct perf_session {
#define PRINT_IP_OPT_ONELINE (1<<4)
#define PRINT_IP_OPT_SRCLINE (1<<5)
+#define PERF_MSR_TSC 0
+#define PERF_MSR_APERF 1
+#define PERF_MSR_MPERF 2
+
+enum perf_freq_perf_index {
+ FREQ_PERF_TSC = 0,
+ FREQ_PERF_APERF = 1,
+ FREQ_PERF_MPERF = 2,
+ FREQ_PERF_CYCLES = 3,
+ FREQ_PERF_REF_CYCLES = 4,
+
+ FREQ_PERF_MAX
+};
+
+#define SET_FREQ_PERF_VALUE(msr_pmu_type, event, array, value) \
+{ \
+ if (event->attr.type == msr_pmu_type) { \
+ if (event->attr.config == PERF_MSR_TSC) \
+ array[FREQ_PERF_TSC] = value; \
+ if (event->attr.config == PERF_MSR_APERF) \
+ array[FREQ_PERF_APERF] = value; \
+ if (event->attr.config == PERF_MSR_MPERF) \
+ array[FREQ_PERF_MPERF] = value; \
+ } \
+ if (event->attr.type == PERF_TYPE_HARDWARE) { \
+ if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) \
+ array[FREQ_PERF_CYCLES] = value; \
+ if (event->attr.config == PERF_COUNT_HW_REF_CPU_CYCLES) \
+ array[FREQ_PERF_REF_CYCLES] = value; \
+ } \
+}
+
+#define HAS_FREQ(array) \
+ ((array[FREQ_PERF_CYCLES] > 0) && (array[FREQ_PERF_REF_CYCLES] > 0))
+#define GET_FREQ(array, cpu_max_freq) \
+ ((array[FREQ_PERF_CYCLES] * cpu_max_freq) / array[FREQ_PERF_REF_CYCLES])
+
+#define HAS_CPU_UTIL(array) \
+ ((array[FREQ_PERF_TSC] > 0) && (array[FREQ_PERF_REF_CYCLES] > 0))
+#define GET_CPU_UTIL(array) \
+ ((100 * array[FREQ_PERF_REF_CYCLES]) / array[FREQ_PERF_TSC])
+
+#define HAS_CORE_BUSY(array) \
+ ((array[FREQ_PERF_APERF] > 0) && (array[FREQ_PERF_MPERF] > 0))
+#define GET_CORE_BUSY(array) \
+ ((100 * array[FREQ_PERF_APERF]) / array[FREQ_PERF_MPERF])
+
struct perf_tool;
struct perf_session *perf_session__new(struct perf_data_file *file,
--
1.8.3.1
From: Kan Liang <[email protected]>
Caculate freq/CPU%/CORE_BUSY% in add_entry_cb, and update the value in
he_stat.
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/builtin-report.c | 36 ++++++++++++++++++++++++++++++++++++
tools/perf/util/sort.h | 3 +++
2 files changed, 39 insertions(+)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 95a4771..3810251 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -88,6 +88,38 @@ static int report__config(const char *var, const char *value, void *cb)
return perf_default_config(var, value, cb);
}
+static void set_he_freq_perf(struct perf_session *session,
+ struct hist_entry_iter *iter)
+{
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_evsel *leader = evsel;
+ struct perf_sample *sample = iter->sample;
+ struct perf_evlist *evlist = session->evlist;
+ u64 cpu_max_freq = session->header.env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ];
+ u64 data[FREQ_PERF_MAX] = { 0 };
+ u64 nr = 0;
+
+ SET_FREQ_PERF_VALUE(session->header.env.msr_pmu_type,
+ evsel, data,
+ sample->read.group.values[nr].value);
+ evlist__for_each_continue(evlist, evsel) {
+ if ((evsel->leader != leader) ||
+ (++nr >= sample->read.group.nr))
+ break;
+ SET_FREQ_PERF_VALUE(session->header.env.msr_pmu_type,
+ evsel, data,
+ sample->read.group.values[nr].value);
+ }
+
+ if (HAS_FREQ(data))
+ he->stat.freq = GET_FREQ(data, cpu_max_freq/1000);
+ if (HAS_CPU_UTIL(data))
+ he->stat.cpu_util = GET_CPU_UTIL(data);
+ if (HAS_CORE_BUSY(data))
+ he->stat.core_busy = GET_CORE_BUSY(data);
+}
+
static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
@@ -99,6 +131,10 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct mem_info *mi;
struct branch_info *bi;
+ if ((iter->ops == &hist_iter_normal) &&
+ perf_evsel__is_group_leader(evsel))
+ set_he_freq_perf(rep->session, iter);
+
if (!ui__has_annotation())
return 0;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e97cd47..df1d908 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -54,6 +54,9 @@ struct he_stat {
u64 period_guest_us;
u64 weight;
u32 nr_events;
+ u64 freq;
+ u64 cpu_util;
+ u64 core_busy;
};
struct hist_entry_diff {
--
1.8.3.1
From: Kan Liang <[email protected]>
Show frequency, CPU Utilization and percent performance for each symbol
in perf report by --stdio --show-freq-perf
In sampling group, only group leader do sampling. So only need to print
group leader's freq in --group.
Here is an example.
$ perf report --stdio --group --show-freq-perf
Overhead FREQ MHz CPU% CORE_BUSY%
Command Shared Object Symbol
........................................ ......... ..... ..........
........... ................ ......................
99.54% 99.54% 99.53% 99.53% 99.53% 2301 96 99
tchain_edit tchain_edit [.] f3
0.20% 0.20% 0.20% 0.20% 0.20% 2301 98 99
tchain_edit tchain_edit [.] f2
0.05% 0.05% 0.05% 0.05% 0.05% 2300 98 99
tchain_edit [kernel.vmlinux] [k] read_tsc
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-report.txt | 12 ++++++
tools/perf/builtin-report.c | 22 +++++++++-
tools/perf/ui/hist.c | 71 +++++++++++++++++++++++++++++---
tools/perf/util/hist.h | 3 ++
tools/perf/util/sort.c | 3 ++
tools/perf/util/symbol.h | 12 +++++-
6 files changed, 116 insertions(+), 7 deletions(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index c33b69f..faa8825 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -303,6 +303,18 @@ OPTIONS
special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
'perf mem' for simpler access.
+--show-freq-perf::
+ Show CPU frequency and performance result from sample read.
+ To generate the frequency and performance output, the perf.data file
+ must have been obtained by group read and using special events cycles,
+ ref-cycles, msr/tsc/, msr/aperf/ or msr/mperf/
+ Freq MHz: The frequency during the sample interval. Needs cycles and
+ ref-cycles event.
+ CPU%: CPU utilization during the sample interval. Needs ref-cycles and
+ msr/tsc/ events.
+ CORE_BUSY%: actual percent performance (APERF/MPERF%) during the
+ sample interval. Needs msr/aperf/ and msr/mperf/ events.
+
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0).
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3810251..cabbd6b5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -132,7 +132,8 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct branch_info *bi;
if ((iter->ops == &hist_iter_normal) &&
- perf_evsel__is_group_leader(evsel))
+ perf_evsel__is_group_leader(evsel) &&
+ symbol_conf.show_freq_perf)
set_he_freq_perf(rep->session, iter);
if (!ui__has_annotation())
@@ -757,6 +758,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+ OPT_BOOLEAN(0, "show-freq-perf", &symbol_conf.show_freq_perf,
+ "show CPU freqency and performance info"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
@@ -769,7 +772,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
+ struct perf_evsel *pos;
int ret = hists__init();
+ bool freq_perf_info[FREQ_PERF_MAX] = {0};
if (ret < 0)
return ret;
@@ -854,6 +859,21 @@ repeat:
symbol_conf.cumulate_callchain = false;
}
+
+ if (symbol_conf.show_freq_perf) {
+ memset(symbol_conf.freq_perf_type, false, sizeof(bool) * DISPLAY_MAX);
+ evlist__for_each(session->evlist, pos) {
+ SET_FREQ_PERF_VALUE(session->header.env.msr_pmu_type,
+ pos, freq_perf_info, true);
+ }
+ if (HAS_FREQ(freq_perf_info))
+ symbol_conf.freq_perf_type[DISPLAY_FREQ] = true;
+ if (HAS_CPU_UTIL(freq_perf_info))
+ symbol_conf.freq_perf_type[DISPLAY_CPU_UTIL] = true;
+ if (HAS_CORE_BUSY(freq_perf_info))
+ symbol_conf.freq_perf_type[DISPLAY_CORE_BUSY] = true;
+ }
+
if (setup_sorting() < 0) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 25d6083..ad49c24 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -17,7 +17,7 @@
static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt, int len,
- hpp_snprint_fn print_fn, bool fmt_percent)
+ hpp_snprint_fn print_fn, bool fmt_percent, bool single)
{
int ret;
struct hists *hists = he->hists;
@@ -36,7 +36,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
} else
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
- if (perf_evsel__is_group_event(evsel)) {
+ if (perf_evsel__is_group_event(evsel) && !single) {
int prev_idx, idx_delta;
struct hist_entry *pair;
int nr_members = evsel->nr_members;
@@ -109,10 +109,16 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
{
int len = fmt->user_len ?: fmt->len;
+ bool single = false;
+
+ if (((fmt == &perf_hpp__format[PERF_HPP__FREQ]) ||
+ (fmt == &perf_hpp__format[PERF_HPP__CPU_UTIL]) ||
+ (fmt == &perf_hpp__format[PERF_HPP__CORE_BUSY])))
+ single = true;
if (symbol_conf.field_sep) {
return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
- print_fn, fmt_percent);
+ print_fn, fmt_percent, single);
}
if (fmt_percent)
@@ -120,7 +126,7 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
else
len -= 1;
- return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent);
+ return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent, single);
}
int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -234,6 +240,30 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
}
+static int hpp__single_width_fn(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel)
+{
+ int len = fmt->user_len ?: fmt->len;
+
+ if (symbol_conf.event_group && !symbol_conf.show_freq_perf)
+ len = max(len, evsel->nr_members * fmt->len);
+
+ if (len < (int)strlen(fmt->name))
+ len = strlen(fmt->name);
+
+ return len;
+}
+
+static int hpp__single_header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel)
+{
+ int len = hpp__single_width_fn(fmt, hpp, evsel);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
+}
+
+
static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
{
va_list args;
@@ -363,6 +393,9 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period)
HPP_RAW_FNS(samples, nr_events)
HPP_RAW_FNS(period, period)
+HPP_RAW_FNS(freq, freq)
+HPP_RAW_FNS(cpu_util, cpu_util)
+HPP_RAW_FNS(core_busy, core_busy)
static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *a __maybe_unused,
@@ -395,6 +428,17 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
.sort = hpp__sort_ ## _fn, \
}
+#define HPP__SINGLE_PRINT_FNS(_name, _fn) \
+ { \
+ .name = _name, \
+ .header = hpp__single_header_fn, \
+ .width = hpp__single_width_fn, \
+ .entry = hpp__entry_ ## _fn, \
+ .cmp = hpp__nop_cmp, \
+ .collapse = hpp__nop_cmp, \
+ .sort = hpp__sort_ ## _fn, \
+ }
+
#define HPP__PRINT_FNS(_name, _fn) \
{ \
.name = _name, \
@@ -414,7 +458,10 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
HPP__PRINT_FNS("Samples", samples),
- HPP__PRINT_FNS("Period", period)
+ HPP__PRINT_FNS("Period", period),
+ HPP__SINGLE_PRINT_FNS("FREQ MHz", freq),
+ HPP__SINGLE_PRINT_FNS("CPU%", cpu_util),
+ HPP__SINGLE_PRINT_FNS("CORE_BUSY%", core_busy)
};
LIST_HEAD(perf_hpp__list);
@@ -485,6 +532,15 @@ void perf_hpp__init(void)
if (symbol_conf.show_total_period)
perf_hpp__column_enable(PERF_HPP__PERIOD);
+ if (symbol_conf.show_freq_perf) {
+ if (symbol_conf.freq_perf_type[DISPLAY_FREQ])
+ perf_hpp__column_enable(PERF_HPP__FREQ);
+ if (symbol_conf.freq_perf_type[DISPLAY_CPU_UTIL])
+ perf_hpp__column_enable(PERF_HPP__CPU_UTIL);
+ if (symbol_conf.freq_perf_type[DISPLAY_CORE_BUSY])
+ perf_hpp__column_enable(PERF_HPP__CORE_BUSY);
+ }
+
/* prepend overhead field for backward compatiblity. */
list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
if (list_empty(list))
@@ -652,6 +708,9 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
return;
switch (idx) {
+ case PERF_HPP__CPU_UTIL:
+ fmt->len = 5;
+ break;
case PERF_HPP__OVERHEAD:
case PERF_HPP__OVERHEAD_SYS:
case PERF_HPP__OVERHEAD_US:
@@ -661,6 +720,8 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
case PERF_HPP__OVERHEAD_GUEST_SYS:
case PERF_HPP__OVERHEAD_GUEST_US:
+ case PERF_HPP__FREQ:
+ case PERF_HPP__CORE_BUSY:
fmt->len = 9;
break;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5ed8d9c..9b50e07 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -235,6 +235,9 @@ enum {
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
+ PERF_HPP__FREQ,
+ PERF_HPP__CPU_UTIL,
+ PERF_HPP__CORE_BUSY,
PERF_HPP__MAX_INDEX
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4c65a14..e75c68a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1225,6 +1225,9 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
+ DIM(PERF_HPP__FREQ, "freq"),
+ DIM(PERF_HPP__CPU_UTIL, "cpu_u"),
+ DIM(PERF_HPP__CORE_BUSY, "core_busy"),
};
#undef DIM
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b98ce51..467ecc4 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -80,6 +80,14 @@ static inline size_t symbol__size(const struct symbol *sym)
struct strlist;
struct intlist;
+enum freq_perf_type_index {
+ DISPLAY_FREQ = 0,
+ DISPLAY_CPU_UTIL,
+ DISPLAY_CORE_BUSY,
+
+ DISPLAY_MAX
+};
+
struct symbol_conf {
unsigned short priv_size;
unsigned short nr_events;
@@ -106,7 +114,9 @@ struct symbol_conf {
filter_relative,
show_hist_headers,
branch_callstack,
- has_filter;
+ has_filter,
+ show_freq_perf,
+ freq_perf_type[DISPLAY_MAX];
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
--
1.8.3.1
On Tue, Jul 28, 2015 at 07:29:31AM -0400, [email protected] wrote:
SNIP
>
> +static void print_cpu_attributes(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
> +{
> + fprintf(fp, "# CPU attributes: max frequency = %lu KHz\n", ph->env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ]);
> +}
> +
> static void print_branch_stack(struct perf_header *ph __maybe_unused,
> int fd __maybe_unused, FILE *fp)
> {
> @@ -1471,6 +1486,25 @@ static int process_cpuid(struct perf_file_section *section __maybe_unused,
> return ph->env.cpuid ? 0 : -ENOMEM;
> }
>
> +static int process_cpu_attributes(struct perf_file_section *section __maybe_unused,
> + struct perf_header *ph, int fd,
> + void *data __maybe_unused)
> +{
> + ssize_t ret;
> + u64 nr;
> +
> + ret = readn(fd, &nr, sizeof(nr));
> + if (ret != sizeof(nr))
> + return -1;
you need to ensure backward compatibility, like new perf with
this section extended will be able to read old (current)
perf.data file.. and the other way round probably too
adding a identifier 'tag' for each value might do it
jirka
On Tue, Jul 28, 2015 at 07:29:33AM -0400, [email protected] wrote:
SNIP
> -static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
> +static void dump_sample(struct perf_session *session, struct perf_evlist *evlist,
> + struct perf_evsel *evsel, union perf_event *event,
> struct perf_sample *sample)
> {
> u64 sample_type;
> @@ -938,7 +963,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
> printf("... transaction: %" PRIx64 "\n", sample->transaction);
>
> if (sample_type & PERF_SAMPLE_READ)
> - sample_read__printf(sample, evsel->attr.read_format);
> + sample_read__printf(session, evlist, sample, evsel->attr.read_format);
> }
>
> static struct machine *machines__find_for_cpumode(struct machines *machines,
> @@ -1036,12 +1061,13 @@ static int
> &sample->read.one, machine);
> }
>
> -static int machines__deliver_event(struct machines *machines,
> +static int machines__deliver_event(struct perf_session *session,
> struct perf_evlist *evlist,
you can cut the evlist argument as well, because it's always from session->evlist
jirka
On Tue, Jul 28, 2015 at 07:29:33AM -0400, [email protected] wrote:
SNIP
>
> -static int machines__deliver_event(struct machines *machines,
> +static int machines__deliver_event(struct perf_session *session,
> struct perf_evlist *evlist,
> union perf_event *event,
> struct perf_sample *sample,
> struct perf_tool *tool, u64 file_offset)
> {
> + struct machines *machines = &session->machines;
> struct perf_evsel *evsel;
> struct machine *machine;
>
> @@ -1053,11 +1079,12 @@ static int machines__deliver_event(struct machines *machines,
>
> switch (event->header.type) {
> case PERF_RECORD_SAMPLE:
> - dump_sample(evsel, event, sample);
> if (evsel == NULL) {
> ++evlist->stats.nr_unknown_id;
> return 0;
> }
> + dump_sample(session, evlist, evsel, event, sample);
same here, you could pass only session all the way through
jirka
On Tue, Jul 28, 2015 at 07:29:33AM -0400, [email protected] wrote:
SNIP
>
> static void event_swap(union perf_event *event, bool sample_id_all)
> diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
> index b44afc7..e6e408b 100644
> --- a/tools/perf/util/session.h
> +++ b/tools/perf/util/session.h
> @@ -42,6 +42,53 @@ struct perf_session {
> #define PRINT_IP_OPT_ONELINE (1<<4)
> #define PRINT_IP_OPT_SRCLINE (1<<5)
>
> +#define PERF_MSR_TSC 0
> +#define PERF_MSR_APERF 1
> +#define PERF_MSR_MPERF 2
> +
> +enum perf_freq_perf_index {
> + FREQ_PERF_TSC = 0,
> + FREQ_PERF_APERF = 1,
> + FREQ_PERF_MPERF = 2,
> + FREQ_PERF_CYCLES = 3,
> + FREQ_PERF_REF_CYCLES = 4,
> +
> + FREQ_PERF_MAX
> +};
hum, rather than adding macros over anonymous array,
how about add perf_freq_t like:
typedef int perf_freq_t[FREQ_PERF_MAX];
and turn macros below into functions like:
SET_FREQ_PERF_VALUE perf_freq__init
HAS_FREQ perf_freq__has_freq
GET_FREQ perf_freq__get_freq
HAS_CPU_UTIL perf_freq__has_cpu_util
GET_CPU_UTIL perf_freq__get_cpu_util
HAS_CORE_BUSY perf_freq__has_core_busy
GET_CORE_BUSY perf_freq__get_core_busy
jirka
> +
> +#define SET_FREQ_PERF_VALUE(msr_pmu_type, event, array, value) \
> +{ \
> + if (event->attr.type == msr_pmu_type) { \
> + if (event->attr.config == PERF_MSR_TSC) \
> + array[FREQ_PERF_TSC] = value; \
> + if (event->attr.config == PERF_MSR_APERF) \
> + array[FREQ_PERF_APERF] = value; \
> + if (event->attr.config == PERF_MSR_MPERF) \
> + array[FREQ_PERF_MPERF] = value; \
> + } \
> + if (event->attr.type == PERF_TYPE_HARDWARE) { \
> + if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) \
> + array[FREQ_PERF_CYCLES] = value; \
> + if (event->attr.config == PERF_COUNT_HW_REF_CPU_CYCLES) \
> + array[FREQ_PERF_REF_CYCLES] = value; \
> + } \
> +}
> +
> +#define HAS_FREQ(array) \
> + ((array[FREQ_PERF_CYCLES] > 0) && (array[FREQ_PERF_REF_CYCLES] > 0))
> +#define GET_FREQ(array, cpu_max_freq) \
> + ((array[FREQ_PERF_CYCLES] * cpu_max_freq) / array[FREQ_PERF_REF_CYCLES])
> +
> +#define HAS_CPU_UTIL(array) \
> + ((array[FREQ_PERF_TSC] > 0) && (array[FREQ_PERF_REF_CYCLES] > 0))
> +#define GET_CPU_UTIL(array) \
> + ((100 * array[FREQ_PERF_REF_CYCLES]) / array[FREQ_PERF_TSC])
> +
> +#define HAS_CORE_BUSY(array) \
> + ((array[FREQ_PERF_APERF] > 0) && (array[FREQ_PERF_MPERF] > 0))
> +#define GET_CORE_BUSY(array) \
> + ((100 * array[FREQ_PERF_APERF]) / array[FREQ_PERF_MPERF])
SNIP
On Tue, Jul 28, 2015 at 07:29:33AM -0400, [email protected] wrote:
SNIP
>
> -static void sample_read__printf(struct perf_sample *sample, u64 read_format)
> +static void sample_read__printf(struct perf_session *session,
> + struct perf_evlist *evlist,
> + struct perf_sample *sample,
> + u64 read_format)
> {
> + struct perf_evsel *evsel;
> + struct perf_sample_id *sid;
> + u64 data[FREQ_PERF_MAX] = { 0 };
> + u64 cpu_max_freq = session->header.env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ];
> +
> printf("... sample_read:\n");
>
> if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> @@ -875,10 +883,26 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format)
> printf("..... id %016" PRIx64
> ", value %016" PRIx64 "\n",
> value->id, value->value);
should 2 hunks hunk below go under symbol_conf.show_freq_perf condition ?
jirka
> +
> + sid = perf_evlist__id2sid(evlist, value->id);
> + evsel = sid->evsel;
> + if (evsel != NULL)
> + SET_FREQ_PERF_VALUE(session->header.env.msr_pmu_type,
> + evsel, data, value->value);
> }
> } else
> printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
> sample->read.one.id, sample->read.one.value);
> +
> + if (HAS_FREQ(data))
> + printf("..... Freq %lu MHz\n",
> + GET_FREQ(data, cpu_max_freq/1000));
> + if (HAS_CPU_UTIL(data))
> + printf("..... CPU%% %lu%%\n",
> + GET_CPU_UTIL(data));
> + if (HAS_CORE_BUSY(data))
> + printf("..... CORE_BUSY%% %lu%%\n",
> + GET_CORE_BUSY(data));
> }
>
SNIP
On Tue, Jul 28, 2015 at 07:29:35AM -0400, [email protected] wrote:
SNIP
> #undef DIM
> diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
> index b98ce51..467ecc4 100644
> --- a/tools/perf/util/symbol.h
> +++ b/tools/perf/util/symbol.h
> @@ -80,6 +80,14 @@ static inline size_t symbol__size(const struct symbol *sym)
> struct strlist;
> struct intlist;
>
> +enum freq_perf_type_index {
> + DISPLAY_FREQ = 0,
> + DISPLAY_CPU_UTIL,
> + DISPLAY_CORE_BUSY,
> +
> + DISPLAY_MAX
> +};
> +
> struct symbol_conf {
> unsigned short priv_size;
> unsigned short nr_events;
> @@ -106,7 +114,9 @@ struct symbol_conf {
> filter_relative,
> show_hist_headers,
> branch_callstack,
> - has_filter;
> + has_filter,
> + show_freq_perf,
> + freq_perf_type[DISPLAY_MAX];
IMO bitmask of 'enum freq_perf_type_index' would be better
jirka
Em Wed, Jul 29, 2015 at 02:52:20PM +0200, Jiri Olsa escreveu:
> On Tue, Jul 28, 2015 at 07:29:33AM -0400, [email protected] wrote:
>
> SNIP
>
> > -static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
> > +static void dump_sample(struct perf_session *session, struct perf_evlist *evlist,
> > + struct perf_evsel *evsel, union perf_event *event,
> > struct perf_sample *sample)
> > {
> > u64 sample_type;
> > @@ -938,7 +963,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
> > printf("... transaction: %" PRIx64 "\n", sample->transaction);
> >
> > if (sample_type & PERF_SAMPLE_READ)
> > - sample_read__printf(sample, evsel->attr.read_format);
> > + sample_read__printf(session, evlist, sample, evsel->attr.read_format);
> > }
> >
> > static struct machine *machines__find_for_cpumode(struct machines *machines,
> > @@ -1036,12 +1061,13 @@ static int
> > &sample->read.one, machine);
> > }
> >
> > -static int machines__deliver_event(struct machines *machines,
> > +static int machines__deliver_event(struct perf_session *session,
> > struct perf_evlist *evlist,
>
> you can cut the evlist argument as well, because it's always from session->evlist
Well, perf_session is about saving/restoring from a perf.data file, and
there are tools that don't deal with perf.data files.
So I think that this running environment information, that is associated
with the evlist, should be accessible from there, i.e. when reading the
perf.data file you get it from there, when running a live tool, you get
it from the system.
I saw code in this patchkit that reads it from the system and writes it
to the perf.data header, that could sould instead get it into a data
structure accessible from the evlist and when writing a perf.data file,
get it from there.
I.e. trying not to entangle session stuff into places that don't touch
it.
Reading a bit more to see if I can come with more suggestions...
- Arnaldo
Em Wed, Jul 29, 2015 at 02:53:29PM +0200, Jiri Olsa escreveu:
> On Tue, Jul 28, 2015 at 07:29:33AM -0400, [email protected] wrote:
>
> SNIP
>
> >
> > -static int machines__deliver_event(struct machines *machines,
> > +static int machines__deliver_event(struct perf_session *session,
> > struct perf_evlist *evlist,
> > union perf_event *event,
> > struct perf_sample *sample,
> > struct perf_tool *tool, u64 file_offset)
> > {
> > + struct machines *machines = &session->machines;
> > struct perf_evsel *evsel;
> > struct machine *machine;
> >
> > @@ -1053,11 +1079,12 @@ static int machines__deliver_event(struct machines *machines,
> >
> > switch (event->header.type) {
> > case PERF_RECORD_SAMPLE:
> > - dump_sample(evsel, event, sample);
> > if (evsel == NULL) {
> > ++evlist->stats.nr_unknown_id;
> > return 0;
> > }
> > + dump_sample(session, evlist, evsel, event, sample);
>
> same here, you could pass only session all the way through
I'll take a look at how interesting it would be to have a evsel->evlist,
that if NULL means the evsel is freestanding, but when it is linked to
an evlist, then it will be there. This way we wouldn't have to pass
(evlist, evsel) when the main purpose of a function is about an evsel
but we need information that is logically associated to all evsels in a
list, i.e. that is in evsel->evlist.
- Arnaldo
Em Wed, Jul 29, 2015 at 12:43:22PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Wed, Jul 29, 2015 at 02:53:29PM +0200, Jiri Olsa escreveu:
> > > switch (event->header.type) {
> > > case PERF_RECORD_SAMPLE:
> > > - dump_sample(evsel, event, sample);
> > > if (evsel == NULL) {
> > > ++evlist->stats.nr_unknown_id;
> > > return 0;
> > > }
> > > + dump_sample(session, evlist, evsel, event, sample);
> >
> > same here, you could pass only session all the way through
>
> I'll take a look at how interesting it would be to have a evsel->evlist,
> that if NULL means the evsel is freestanding, but when it is linked to
> an evlist, then it will be there. This way we wouldn't have to pass
> (evlist, evsel) when the main purpose of a function is about an evsel
> but we need information that is logically associated to all evsels in a
> list, i.e. that is in evsel->evlist.
There are no cases where a function receives (evsel, evlist) with that
evlist containing that evsel :-\
Perhaps this will be the first, i.e. rename perf_session_env to
perf_env, then store it in evlist->env, then when processing something
where we have a evsel or evlist we can access that env from:
evsel->evlist->env;
Will continue after lunch, trying to prototype what I just described.
What I have is in a tmp.perf/core branch in my tree..
- Arnaldo
> Em Wed, Jul 29, 2015 at 12:43:22PM -0300, Arnaldo Carvalho de Melo
> escreveu:
> > Em Wed, Jul 29, 2015 at 02:53:29PM +0200, Jiri Olsa escreveu:
> > > > switch (event->header.type) {
> > > > case PERF_RECORD_SAMPLE:
> > > > - dump_sample(evsel, event, sample);
> > > > if (evsel == NULL) {
> > > > ++evlist->stats.nr_unknown_id;
> > > > return 0;
> > > > }
> > > > + dump_sample(session, evlist, evsel, event, sample);
> > >
> > > same here, you could pass only session all the way through
> >
> > I'll take a look at how interesting it would be to have a
> > evsel->evlist, that if NULL means the evsel is freestanding, but when
> > it is linked to an evlist, then it will be there. This way we wouldn't
> > have to pass (evlist, evsel) when the main purpose of a function is
> > about an evsel but we need information that is logically associated to
> > all evsels in a list, i.e. that is in evsel->evlist.
>
> There are no cases where a function receives (evsel, evlist) with that evlist
> containing that evsel :-\
>
> Perhaps this will be the first, i.e. rename perf_session_env to perf_env,
> then store it in evlist->env, then when processing something where we
> have a evsel or evlist we can access that env from:
>
> evsel->evlist->env;
>
> Will continue after lunch, trying to prototype what I just described.
>
Hi Arnaldo,
Have you got a chance to implement the prototype for evlist->env?
Thanks,
Kan
Em Tue, Aug 04, 2015 at 05:07:40PM +0000, Liang, Kan escreveu:
> > Em Wed, Jul 29, 2015 at 12:43:22PM -0300, Arnaldo Carvalho de Melo
> > > I'll take a look at how interesting it would be to have a
> > > evsel->evlist, that if NULL means the evsel is freestanding, but when
> > > it is linked to an evlist, then it will be there. This way we wouldn't
> > > have to pass (evlist, evsel) when the main purpose of a function is
> > > about an evsel but we need information that is logically associated to
> > > all evsels in a list, i.e. that is in evsel->evlist.
> > There are no cases where a function receives (evsel, evlist) with that evlist
> > containing that evsel :-\
> > Perhaps this will be the first, i.e. rename perf_session_env to perf_env,
> > then store it in evlist->env, then when processing something where we
> > have a evsel or evlist we can access that env from:
> > evsel->evlist->env;
> > Will continue after lunch, trying to prototype what I just described.
> Hi Arnaldo,
> Have you got a chance to implement the prototype for evlist->env?
Not really, got sidetracked :-\
- Arnaldo